library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)


options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Load social distancing data and blockgroups

Load the Safegraph social distancing data and San Jose blockgroups

# get SJ blockgroups 
# get San Jose block groups
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)

# Use tracts sent to us by San Jose
sj_tracts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp") %>%
  st_as_sf() %>%
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID):    2227
## proj4string:    +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
sj_citycouncil_disticts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp") %>%
  st_as_sf() %>%
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID):    2227
## proj4string:    +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
# from code written by others to get SJ blockgroups
sj_blockgroups <-
  scc_blockgroups %>%
  st_centroid() %>%
  st_join(sj_tracts, left = F) %>%
  st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
  mutate(
    DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
  ) %>%
  st_set_geometry(NULL) %>%
  left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
  st_as_sf() %>%
  dplyr::select(GEOID, DISTRICTS)

# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9

# code from others in the class to get social distancing data 
sj_socialdistancing <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/sj_socialdistancing.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date()) %>% 
  left_join(sj_blockgroups, by = c("origin_census_block_group" = "GEOID")) %>% 
  filter(!is.na(DISTRICTS))

# obtaining weekends
weekends <-
  sj_socialdistancing %>% 
  filter(!duplicated(date)) %>% 
  arrange(date) %>% 
  mutate(
    weekend = 
      ifelse(
        (date %>% as.numeric()) %% 7 %in% c(2,3),
        T,
        F
      )
  ) %>% 
  dplyr::select(date,weekend)

sj_socialdistancing <- 
  sj_socialdistancing %>% 
  left_join(weekends)

# date of the shelter in place order
shelter_start <- "2020-03-16" %>% as.Date()

# get average staying at home on weekdays in January and February
sj_pre_sd_at_home_average <- sj_socialdistancing %>% 
  filter(weekend == F) %>% 
  filter(date <  as.Date("2020-03-01")) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home Pre Shelter` = (completely_home_device_count/device_count*100) %>% round(1), `% not completely at home pre shelter` = (100 - `% Completely at Home Pre Shelter`))

Obtaining demographic variables

Here I obtain various demographic data, including income (percent below 50% and 80% of area median income), vehicle ownership, age, English language ability, and occupants per room.

# obtain the saved census data 
setwd("~/Documents/2020 Spring Quarter/CEE 218Z")
acs_vars = readRDS("censusData2018_acs_acs5.rds")
setwd("~/Documents/2020 Spring Quarter/CEE 218Z/covid19")
# load in income data - code adapted from other students
sj_median_income_by_block <-
  getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "B19013_001E"
  ) %>%
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  rename(
    Median_Income = B19013_001E 
  ) %>% 
  filter(!is.na(Median_Income)) %>% 
  left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>% #this code gives each blockgroup a district designation
  filter(
    !is.na(DISTRICTS)
  ) %>% 
  
  # this code joins our census data with the social distancing data, processed as shown below
  left_join(sj_socialdistancing %>%  
                          filter(weekend == F) %>% 
                          filter(date > shelter_start) %>%
                          group_by(origin_census_block_group) %>% 
                          summarize(
                                    completely_home_device_count = sum(completely_home_device_count),
                                    device_count = sum(device_count)) %>% 
                          mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), 
                                 `% not completely at home` = (100 - `% Completely at Home`)),
            by = c("blockgroup" = "origin_census_block_group")
  ) %>% 
  filter(
    !is.na(device_count)
  ) %>% 
  left_join(sj_pre_sd_at_home_average %>% dplyr::select(origin_census_block_group, `% Completely at Home Pre Shelter`, `% not completely at home pre shelter`), by = c("blockgroup" = "origin_census_block_group"))

sj_ami_by_block <-
  getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B19001)"
  ) %>%
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
  group_by(blockgroup) %>% 
  summarize(
    Total = B19001_001E,
    `Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
    #sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
    `Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E), 
    `Under 125,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E, B19001_014E)
  ) %>% 
  mutate(
    `% under 75,000` = `Under 75,000` / Total * 100,
    `% over 75,000` = (100 - `% under 75,000`),
    `% under 100,000` = `Under 100,000` / Total * 100,
    `% over 100,000` = (100 - `% under 100,000`),
    `% under 125,000` = `Under 125,000` / Total * 100,
    `% over 125,000` = (100 - `% under 125,000`),
  ) %>% 
  left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)
  ) %>% 
  filter(!is.na(device_count))
# loading in language data - code adapted from other students
sj_lang_by_block <-
  getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B16004)"
  )  %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable",
    value = "estimate", 
    - blockgroup
  ) %>% 
  left_join(acs_vars, by = c("variable" = "name")) %>% 
  mutate(
    tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
  ) %>% 
  filter(tier %in% c('Speak English "not well"', 
                     'Speak English "not at all"', 
                     'Total', 'Speak Spanish', 
                     'Speak Asian and Pacific Island languages')) %>% 
  group_by(blockgroup, tier) %>% 
  summarise(
    estimate1 = sum(estimate)
  ) %>% 
  spread(
    key = "tier",
    value = "estimate1"
  ) %>% 
  mutate(
    `% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
    `% speaking english > well` = (100 - `% speaking english < well`),
    `% speaking spanish` = (`Speak Spanish`/ Total) * 100,
    `% not speaking spanish` = (100 - `% speaking spanish`),
    `% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100
  ) %>% 
  left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)) %>% 
  filter(!is.na(device_count)) %>% 
  mutate(log_perc = log(`% speaking english < well`))
# loading in age data - specifically looking at percentage 65+ and percentage <30
sj_age_by_block <- getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B01001)"
  ) %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable",
    value = "estimate", 
    - blockgroup
  ) %>% 
  mutate(
    label = acs_vars$label[match(variable,acs_vars$name)]
  ) %>% 
  select(-variable) %>% 
  separate(
    label,
    into = c(NA,NA,"sex","age"),
    sep = "!!"
  ) %>% filter(!is.na(age)) %>% 
  mutate(elderly = ifelse(age %in% c("65 and 66 years", "67 to 69 years", "70 to 74 years", "75 to 79 years", "80 to 84 years", "85 years and over"), estimate, NA), `less than 30` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years", "18 and 19 years", "20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA), `less than 18` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years"), estimate, NA), `20-29` = ifelse(age %in% c("20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA)) %>% 
  group_by(blockgroup) %>% 
  summarize(elderly = sum(elderly, na.rm = T), `less than 30` = sum(`less than 30`, na.rm = T), total = sum(estimate, na.rm = T), `less than 18` = sum(`less than 18`, na.rm = T), `20-29` = sum(`20-29`, na.rm = T)) %>% 
  mutate(`percent elderly` = elderly*100 / total, `percent less than 30` = `less than 30`*100 / total, `percent nonelderly` = (100 - `percent elderly`), `percent less than 18` = `less than 18`*100/total, `percent 20-29` = `20-29`*100/total) %>% 
  left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)) %>% 
  filter(!is.na(device_count))

# keep all age categories separated
sj_all_age_by_block <- getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B01001)"
  ) %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable",
    value = "estimate", 
    - blockgroup
  ) %>% 
  mutate(
    label = acs_vars$label[match(variable,acs_vars$name)]
  ) %>% 
  select(-variable) %>% 
  separate(
    label,
    into = c(NA,NA,"sex","age"),
    sep = "!!"
  ) %>% filter(!is.na(age)) %>% 
  group_by(blockgroup, age) %>%
  summarize(total_by_age = sum(estimate)) %>%
  spread(key = age, value = total_by_age) %>%
  left_join(sj_age_by_block %>% dplyr::select(blockgroup, total)) %>% 
  left_join(sj_median_income_by_block %>% dplyr::select(device_count, blockgroup)) %>% 
  filter(!is.na(device_count)) %>%
  dplyr::select(-device_count)
# get data on vehicles available as vehicles allocation
sj_vehicles_by_block <- getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B992512)"
  ) %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  dplyr::select(B992512_001E, blockgroup) %>%
  rename(total_vehicles = B992512_001E, blockgroup = blockgroup) %>%
  left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  mutate(`vehicles per capita` = total_vehicles / total) %>%
  filter(!is.na(device_count)) 

# also get data on vehicles available as households without a vehicle
sj_no_vehicles_by_block <- getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B25044)"
  ) %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>%
  separate(label, into = c(NA, NA, NA,"vehicles"), sep = "!!") %>% 
  filter(!is.na(vehicles)) %>%
  group_by(blockgroup, vehicles) %>%
  summarize(grouped_vehicles = sum(estimate)) %>%
  spread(key = vehicles, value = grouped_vehicles) %>%
  mutate(total_nums = `1 vehicle available` + `2 vehicles available` + `3 vehicles available` + `4 vehicles available` + `5 or more vehicles available` + `No vehicle available`, `percent no vehicles` = `No vehicle available`*100 / total_nums, `percent with vehicles` = (100-`percent no vehicles`)) %>%
  left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count))
# get data on occupants per room
sj_occupants_per_room_by_block <- getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B25014)"
  ) %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>% 
  filter(!is.na(`occupants per room`)) %>%
  group_by(blockgroup, `occupants per room`) %>%
  summarize(estimate_tot = sum(estimate)) %>% 
  spread(key = `occupants per room`, value = estimate_tot) %>%
  mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`, `percent 1 or more` = (`1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) * 100/ total_nums, `percent less than 1` = (100-`percent 1 or more`)) %>%
  left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

Testing correlations

In the plots below, I show the selected variables against percent of devices completely at home since the shelter-in-place order started, as well as against percent of devices pre-shelter-in-place for comparison.

Age

# age
sj_age_by_block %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Young Age Groups"
  )

young_model <- lm(sj_age_by_block$`% not completely at home` ~ sj_age_by_block$`percent less than 30`)
summary(young_model)
## 
## Call:
## lm(formula = sj_age_by_block$`% not completely at home` ~ sj_age_by_block$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.831  -4.881  -0.287   4.243  39.516 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            44.78445    1.49842  29.888  < 2e-16 ***
## sj_age_by_block$`percent less than 30`  0.17835    0.03798   4.695 3.34e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.216 on 567 degrees of freedom
## Multiple R-squared:  0.03743,    Adjusted R-squared:  0.03573 
## F-statistic: 22.05 on 1 and 567 DF,  p-value: 3.34e-06
sj_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Elderly Population"
  )

elderly_model <- lm(`% not completely at home` ~ `percent elderly`, sj_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent elderly`, 
##     data = sj_age_by_block %>% filter(`percent elderly` < 50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.854  -5.158  -0.420   4.239  35.422 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       53.85340    0.77548  69.446  < 2e-16 ***
## `percent elderly` -0.17447    0.05355  -3.258  0.00119 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.281 on 564 degrees of freedom
## Multiple R-squared:  0.01848,    Adjusted R-squared:  0.01674 
## F-statistic: 10.62 on 1 and 564 DF,  p-value: 0.001188
sj_age_by_block %>% 
  ggplot(aes(
  x = `percent less than 18`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents less than 18",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Child Population"
  )

child_model <- lm(`% not completely at home` ~ `percent less than 18`, sj_age_by_block)
summary(child_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent less than 18`, 
##     data = sj_age_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.935  -5.170  -0.281   4.450  40.831 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            54.54117    1.21057   45.05   <2e-16 ***
## `percent less than 18` -0.12948    0.05158   -2.51   0.0124 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.328 on 567 degrees of freedom
## Multiple R-squared:  0.01099,    Adjusted R-squared:  0.009244 
## F-statistic:   6.3 on 1 and 567 DF,  p-value: 0.01235
sj_age_by_block %>% 
  ggplot(aes(
  x = `percent 20-29`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents ages 20-29",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Population Ages 20-29"
  )

young_adult_model <- lm(`% not completely at home` ~ `percent 20-29`, sj_age_by_block)
summary(young_adult_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent 20-29`, data = sj_age_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.675  -4.730  -0.263   4.256  40.315 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     48.08479    0.67144  71.614  < 2e-16 ***
## `percent 20-29`  0.25588    0.04177   6.126 1.68e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.11 on 567 degrees of freedom
## Multiple R-squared:  0.06208,    Adjusted R-squared:  0.06043 
## F-statistic: 37.53 on 1 and 567 DF,  p-value: 1.685e-09
# compare this to pre-shelter-in-place behavior
sj_age_by_block %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Percent devices leaving home pre-shelter-in-place",
    title = "San Jose: Staying at Home and Young Age Groups Pre Shelter-in-Place"
  )

young_model2 <- lm(sj_age_by_block$`% not completely at home pre shelter` ~ sj_age_by_block$`percent less than 30`)
summary(young_model2)
## 
## Call:
## lm(formula = sj_age_by_block$`% not completely at home pre shelter` ~ 
##     sj_age_by_block$`percent less than 30`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.1939  -2.8160  -0.1557   2.9950  16.7071 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            81.87032    0.82253   99.53  < 2e-16 ***
## sj_age_by_block$`percent less than 30` -0.11072    0.02085   -5.31 1.57e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.51 on 567 degrees of freedom
## Multiple R-squared:  0.04738,    Adjusted R-squared:  0.0457 
## F-statistic:  28.2 on 1 and 567 DF,  p-value: 1.573e-07
sj_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "San Jose: Staying at Home and Elderly Population Pre Shelter-in-Place"
  )

elderly_model2 <- lm(`% not completely at home pre shelter` ~ `percent elderly`, sj_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent elderly`, 
##     data = sj_age_by_block %>% filter(`percent elderly` < 50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.236  -2.830  -0.158   3.145  14.296 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        75.9045     0.4257 178.295  < 2e-16 ***
## `percent elderly`   0.1329     0.0294   4.522 7.47e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.546 on 564 degrees of freedom
## Multiple R-squared:  0.03499,    Adjusted R-squared:  0.03328 
## F-statistic: 20.45 on 1 and 564 DF,  p-value: 7.466e-06
sj_age_by_block %>% 
  ggplot(aes(
  x = `percent less than 18`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents less than 18",
    y = "Percent devices leaving home on weekdays pre shelter-in-place",
    title = "San Jose: Social Distancing and Child Population Pre Shelter"
  )

child_model2 <- lm(`% not completely at home pre shelter` ~ `percent less than 18`, sj_age_by_block)
summary(child_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent less than 18`, 
##     data = sj_age_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.3901  -3.0050   0.0411   3.1602  12.4458 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            76.08044    0.66828 113.845   <2e-16 ***
## `percent less than 18`  0.06849    0.02848   2.405   0.0165 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.597 on 567 degrees of freedom
## Multiple R-squared:  0.0101, Adjusted R-squared:  0.008352 
## F-statistic: 5.784 on 1 and 567 DF,  p-value: 0.01649
sj_age_by_block %>% 
  ggplot(aes(
  x = `percent 20-29`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents ages 20-29",
    y = "Percent devices leaving home on weekdays pre shelter-in-place",
    title = "San Jose: Social Distancing and Population Ages 20-29 Pre Shelter"
  )

young_adult_model2 <- lm(`% not completely at home pre shelter` ~ `percent 20-29`, sj_age_by_block)
summary(young_adult_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent 20-29`, 
##     data = sj_age_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.0695  -2.7320  -0.1156   2.8283  15.9003 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     80.23612    0.36071 222.440  < 2e-16 ***
## `percent 20-29` -0.18877    0.02244  -8.413 3.26e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.357 on 567 degrees of freedom
## Multiple R-squared:  0.111,  Adjusted R-squared:  0.1094 
## F-statistic: 70.78 on 1 and 567 DF,  p-value: 3.264e-16

Income

# income - less than $75000
sj_ami_by_block %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Households Above 50% AMI"
  )

income_75_model <- lm(`% not completely at home` ~ `% over 75,000`, sj_ami_by_block)
summary(income_75_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `% over 75,000`, data = sj_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.452  -4.792  -0.730   4.183  33.701 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     64.50250    1.10337   58.46   <2e-16 ***
## `% over 75,000` -0.20754    0.01701  -12.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.365 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2083, Adjusted R-squared:  0.2069 
## F-statistic: 148.9 on 1 and 566 DF,  p-value: < 2.2e-16
# income - less than $100000
sj_ami_by_block %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Households Below 80% AMI"
  )

income_100_model <- lm(`% not completely at home` ~ `% over 100,000`, sj_ami_by_block)
summary(income_100_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `% over 100,000`, data = sj_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.837  -4.737  -0.517   4.200  32.094 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      61.95439    0.86295   71.79   <2e-16 ***
## `% over 100,000` -0.20339    0.01582  -12.86   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.282 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2261, Adjusted R-squared:  0.2247 
## F-statistic: 165.3 on 1 and 566 DF,  p-value: < 2.2e-16
# income - less than $125000
sj_ami_by_block %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Households Below $125,000"
  )

income_125_model <- lm(`% not completely at home` ~ `% over 125,000`, sj_ami_by_block)
summary(income_125_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `% over 125,000`, data = sj_ami_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.4254  -4.5655  -0.6404   4.0420  31.1510 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      60.21130    0.72656   82.87   <2e-16 ***
## `% over 125,000` -0.20988    0.01604  -13.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.253 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2323, Adjusted R-squared:  0.231 
## F-statistic: 171.3 on 1 and 566 DF,  p-value: < 2.2e-16
# compare to pre shelter in place
sj_ami_by_block %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "San Jose: Staying at Home and Households Above 50% AMI Pre Shelter-in-Place"
  )

income_75_model2 <- lm(`% not completely at home pre shelter` ~ `% over 75,000`, sj_ami_by_block)
summary(income_75_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% over 75,000`, 
##     data = sj_ami_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.4357  -2.7003  -0.1437   2.7764  16.6680 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     72.61447    0.65712 110.504  < 2e-16 ***
## `% over 75,000`  0.08029    0.01013   7.926 1.21e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.386 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.09991,    Adjusted R-squared:  0.09832 
## F-statistic: 62.83 on 1 and 566 DF,  p-value: 1.206e-14
# income - less than $100000
sj_ami_by_block %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "San Jose: Staying Home and Households Below 80% AMI Pre Shelter-in-Place"
  )

income_100_model2 <- lm(`% not completely at home pre shelter` ~ `% over 100,000`, sj_ami_by_block)
summary(income_100_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% over 100,000`, 
##     data = sj_ami_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.5034  -2.6406   0.0803   2.5599  16.9387 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      73.26132    0.51177 143.152   <2e-16 ***
## `% over 100,000`  0.08532    0.00938   9.096   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.319 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1275, Adjusted R-squared:  0.126 
## F-statistic: 82.73 on 1 and 566 DF,  p-value: < 2.2e-16
# over 125000
sj_ami_by_block %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "San Jose: Social Distancing and Households Below $125,000 Pre Shelter-in-Place"
  )

income_125_model2 <- lm(`% not completely at home pre shelter` ~ `% over 125,000`, sj_ami_by_block)
summary(income_125_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% over 125,000`, 
##     data = sj_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.353  -2.556   0.022   2.522  16.560 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      73.640242   0.425069   173.2   <2e-16 ***
## `% over 125,000`  0.096607   0.009382    10.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.243 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1578, Adjusted R-squared:  0.1563 
## F-statistic:   106 on 1 and 566 DF,  p-value: < 2.2e-16

Language

# language
sj_lang_by_block %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and English Language Ability"
  )

english_ability_model <- lm(`% not completely at home` ~ `% speaking english > well`, sj_lang_by_block)
summary(english_ability_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `% speaking english > well`, 
##     data = sj_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.827  -5.150  -0.441   3.895  38.759 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 67.27293    3.32870  20.210  < 2e-16 ***
## `% speaking english > well` -0.17632    0.03732  -4.724 2.92e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.214 on 567 degrees of freedom
## Multiple R-squared:  0.03787,    Adjusted R-squared:  0.03617 
## F-statistic: 22.32 on 1 and 567 DF,  p-value: 2.916e-06
sj_lang_by_block %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Spanish Language Ability"
  )

spanish_speaking_model <- lm(`% not completely at home` ~ `% not speaking spanish`, sj_lang_by_block)
summary(spanish_speaking_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `% not speaking spanish`, 
##     data = sj_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.771  -4.456  -0.658   3.402  37.904 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              63.87426    1.30595  48.910   <2e-16 ***
## `% not speaking spanish` -0.15747    0.01627  -9.679   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.758 on 567 degrees of freedom
## Multiple R-squared:  0.1418, Adjusted R-squared:  0.1403 
## F-statistic: 93.69 on 1 and 567 DF,  p-value: < 2.2e-16
# compare to pre shelter in place
sj_lang_by_block %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "San Jose: Staying at Home and English Language Ability Pre Shelter-in-Place"
  )

english_ability_model2 <- lm(`% not completely at home pre shelter` ~ `% speaking english > well`, sj_lang_by_block)
summary(english_ability_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% speaking english > well`, 
##     data = sj_lang_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.9364  -2.4342   0.0388   3.0316  12.3011 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 60.84131    1.73337  35.100   <2e-16 ***
## `% speaking english > well`  0.18913    0.01943   9.732   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.277 on 567 degrees of freedom
## Multiple R-squared:  0.1431, Adjusted R-squared:  0.1416 
## F-statistic:  94.7 on 1 and 567 DF,  p-value: < 2.2e-16
sj_lang_by_block %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Percent devices leaving home on weekdays pre shelter-in-place",
    title = "San Jose: Staying at Home and Spanish Language Ability Pre Shelter-in-Place"
  )

spanish_speaking_model2 <- lm(`% not completely at home pre shelter` ~ `% not speaking spanish`, sj_lang_by_block)
summary(spanish_speaking_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% not speaking spanish`, 
##     data = sj_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.793  -2.540  -0.002   2.680  11.988 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              71.228200   0.726831  97.998   <2e-16 ***
## `% not speaking spanish`  0.082206   0.009054   9.079   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.318 on 567 degrees of freedom
## Multiple R-squared:  0.1269, Adjusted R-squared:  0.1254 
## F-statistic: 82.43 on 1 and 567 DF,  p-value: < 2.2e-16

Occupants per room

# occupants per room
sj_occupants_per_room_by_block %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Room Occupancy"
  )

occupants_model <- lm(`% not completely at home` ~ `percent less than 1`, sj_occupants_per_room_by_block)
summary(occupants_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent less than 1`, 
##     data = sj_occupants_per_room_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.996  -4.968  -0.203   3.715  34.804 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           70.74035    2.92106  24.217  < 2e-16 ***
## `percent less than 1` -0.21245    0.03217  -6.604 9.24e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.976 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.07154,    Adjusted R-squared:  0.0699 
## F-statistic: 43.61 on 1 and 566 DF,  p-value: 9.235e-11
# compare to pre shelter in place
sj_occupants_per_room_by_block %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Percent devices leaving home on weekdays pre shelter-in-place",
    title = "San Jose: Staying at Home and Room Occupancy Pre Shelter-in-Place"
  )

occupants_model2 <- lm(`% not completely at home pre shelter` ~ `percent less than 1`, sj_occupants_per_room_by_block)
summary(occupants_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent less than 1`, 
##     data = sj_occupants_per_room_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.3246  -2.6506  -0.2808   2.7536  17.0509 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           62.88485    1.57437  39.943   <2e-16 ***
## `percent less than 1`  0.16329    0.01734   9.418   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.299 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1355, Adjusted R-squared:  0.134 
## F-statistic:  88.7 on 1 and 566 DF,  p-value: < 2.2e-16

Vehicle ownership

# vehicles
sj_vehicles_by_block %>% 
  ggplot(aes(
  x = `vehicles per capita`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Number of vehicles per capita",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Vehicles Per Capita"
  )

# vehicles - percent with no vehicles
sj_no_vehicles_by_block %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Vehicle Availability"
  )

vehicles_model <- lm(`% not completely at home` ~ `percent with vehicles`, sj_no_vehicles_by_block)
summary(vehicles_model)
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent with vehicles`, 
##     data = sj_no_vehicles_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.881  -5.081  -0.410   4.723  38.019 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             75.13045    5.09799  14.737  < 2e-16 ***
## `percent with vehicles` -0.24749    0.05345  -4.631 4.53e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.125 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.0365, Adjusted R-squared:  0.0348 
## F-statistic: 21.44 on 1 and 566 DF,  p-value: 4.528e-06
# compare to pre shelter in place
sj_no_vehicles_by_block %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% not completely at home pre shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Percent devices leaving home on weekdays pre shelter-in-place",
    title = "San Jose: Social Distancing and Vehicle Availability Pre Shelter-in-Place"
  )

vehicles_model2 <- lm(`% not completely at home pre shelter` ~ `percent with vehicles`, sj_no_vehicles_by_block)
summary(vehicles_model2)
## 
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent with vehicles`, 
##     data = sj_no_vehicles_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5618  -2.9606  -0.0694   3.0006  12.6053 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             63.25942    2.83717  22.297  < 2e-16 ***
## `percent with vehicles`  0.15084    0.02975   5.071 5.37e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.522 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.04346,    Adjusted R-squared:  0.04177 
## F-statistic: 25.72 on 1 and 566 DF,  p-value: 5.37e-07

Multiple regression analyses

Income, age, language, and occupants per room

# multiple regression 
modeltest <- lm(sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + sj_age_by_block$`percent less than 30` + sj_lang_by_block$`% speaking english > well` + sj_occupants_per_room_by_block$`percent less than 1`)
summary(modeltest)
## 
## Call:
## lm(formula = sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + 
##     sj_age_by_block$`percent less than 30` + sj_lang_by_block$`% speaking english > well` + 
##     sj_occupants_per_room_by_block$`percent less than 1`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.8003  -4.4756  -0.7249   3.8851  31.0322 
## 
## Coefficients:
##                                                      Estimate Std. Error
## (Intercept)                                          51.03204    4.71295
## sj_ami_by_block$`% over 125,000`                     -0.23936    0.02125
## sj_age_by_block$`percent less than 30`                0.01300    0.04157
## sj_lang_by_block$`% speaking english > well`          0.13916    0.04554
## sj_occupants_per_room_by_block$`percent less than 1` -0.02718    0.04556
##                                                      t value Pr(>|t|)    
## (Intercept)                                           10.828  < 2e-16 ***
## sj_ami_by_block$`% over 125,000`                     -11.262  < 2e-16 ***
## sj_age_by_block$`percent less than 30`                 0.313  0.75464    
## sj_lang_by_block$`% speaking english > well`           3.056  0.00235 ** 
## sj_occupants_per_room_by_block$`percent less than 1`  -0.597  0.55105    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.206 on 563 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2461, Adjusted R-squared:  0.2408 
## F-statistic: 45.95 on 4 and 563 DF,  p-value: < 2.2e-16

Education and income

educ_income_model <- lm(sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + sj_education_by_block$`percent associates or higher`)
summary(educ_income_model)
## 
## Call:
## lm(formula = sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + 
##     sj_education_by_block$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.075  -4.527  -0.998   3.807  31.484 
## 
## Coefficients:
##                                                      Estimate Std. Error
## (Intercept)                                          61.72611    0.81621
## sj_ami_by_block$`% over 125,000`                     -0.14878    0.02229
## sj_education_by_block$`percent associates or higher` -0.08534    0.02191
##                                                      t value Pr(>|t|)    
## (Intercept)                                           75.625  < 2e-16 ***
## sj_ami_by_block$`% over 125,000`                      -6.675 5.93e-11 ***
## sj_education_by_block$`percent associates or higher`  -3.896  0.00011 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.164 on 565 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2524, Adjusted R-squared:  0.2497 
## F-statistic: 95.37 on 2 and 565 DF,  p-value: < 2.2e-16

Internet and income

educ_income_model <- lm(sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + sj_internet_by_block$`percent high speed`)
summary(educ_income_model)
## 
## Call:
## lm(formula = sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + 
##     sj_internet_by_block$`percent high speed`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.5064  -4.4482  -0.7029   3.8469  30.8939 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                               65.44722    2.26128  28.943  < 2e-16
## sj_ami_by_block$`% over 125,000`          -0.17651    0.02101  -8.400 3.61e-16
## sj_internet_by_block$`percent high speed` -0.08192    0.03352  -2.444   0.0148
##                                              
## (Intercept)                               ***
## sj_ami_by_block$`% over 125,000`          ***
## sj_internet_by_block$`percent high speed` *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.221 on 565 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2403, Adjusted R-squared:  0.2377 
## F-statistic: 89.38 on 2 and 565 DF,  p-value: < 2.2e-16

Income and Spanish language ability

income_spanish_model <- lm(sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + sj_lang_by_block$`% not speaking spanish`)
summary(income_spanish_model)
## 
## Call:
## lm(formula = sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + 
##     sj_lang_by_block$`% not speaking spanish`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.5661  -4.4969  -0.7662   4.0045  30.8559 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                               62.74652    1.22089  51.394   <2e-16
## sj_ami_by_block$`% over 125,000`          -0.17711    0.02040  -8.680   <2e-16
## sj_lang_by_block$`% not speaking spanish` -0.04992    0.01937  -2.577   0.0102
##                                              
## (Intercept)                               ***
## sj_ami_by_block$`% over 125,000`          ***
## sj_lang_by_block$`% not speaking spanish` *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.217 on 565 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2412, Adjusted R-squared:  0.2385 
## F-statistic: 89.81 on 2 and 565 DF,  p-value: < 2.2e-16

This suggests that once controlling for income, Spanish language ability is no longer a strong predictor of leaving home during the shelter-in-place order.

Correlations with increase in staying home

Now I consider looking at correlations with the change in percent of devices staying completely at home since shelter-in-place started relative to the pre-shelter-in-place levels. I plot the change in percentage staying completely at home, and show linear fitting models for the change in percent staying at home, as well as the fractional increase in percent staying home.

# collect the demographic variables
sj_dem_distancing <- sj_internet_by_block %>% 
  dplyr::select(`percent high speed`, `% not completely at home`, `% Completely at Home`, blockgroup) %>% 
  left_join(sj_education_by_block %>% dplyr::select(blockgroup, `percent associates or higher`)) %>% 
  left_join(sj_ami_by_block %>% dplyr::select(blockgroup, `% over 125,000`)) %>% 
  left_join(sj_ami_by_block %>% dplyr::select(blockgroup, `% over 100,000`)) %>% 
  left_join(sj_ami_by_block %>% dplyr::select(blockgroup, `% over 75,000`)) %>% 
  left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent less than 30`)) %>% 
  left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent elderly`)) %>% 
  left_join(sj_lang_by_block %>% dplyr::select(blockgroup, `% not speaking spanish`)) %>% 
  left_join(sj_lang_by_block %>% dplyr::select(blockgroup, `% speaking english > well`)) %>% 
  left_join(sj_no_vehicles_by_block %>% dplyr::select(blockgroup, `percent with vehicles`)) %>%
  left_join(sj_occupants_per_room_by_block %>% dplyr::select(blockgroup, `percent less than 1`)) %>% 
  left_join(sj_sex_workers_by_block %>% dplyr::select(blockgroup, `% male workers`)) %>%
  left_join(sj_race_by_block %>% dplyr::select(blockgroup, `% white`, `% Asian`, `% non hispanic/latino`)) %>%
  left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent less than 18`)) %>%
  left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent 20-29`))

sj_dem_distancing_pre_post <- sj_dem_distancing %>% 
  left_join(sj_internet_by_block %>% dplyr::select(`% not completely at home pre shelter`, `% Completely at Home Pre Shelter`, blockgroup)) %>% 
  mutate(`% increase in staying completely home` = `% Completely at Home` - `% Completely at Home Pre Shelter`, frac_increase = `% increase in staying completely home`/`% Completely at Home Pre Shelter`)

sj_dem_distancing[is.na(sj_dem_distancing)] <- 0
sj_dem_distancing_pre_post[is.na(sj_dem_distancing_pre_post)] <- 0

saveRDS(sj_dem_distancing_pre_post, "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/sj_socialdistancing_demdata_prepostdifs_manyvars.rds")

# sj_dem_distancing_pre_post <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/sj_socialdistancing_demdata_prepostdifs_manyvars.rds")

Age

# age
sj_dem_distancing_pre_post %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Young Age Groups"
  )

young_model_dif <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_dif)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.880  -5.336  -0.078   5.582  30.582 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                       37.08587    1.71572  21.615
## sj_dem_distancing_pre_post$`percent less than 30` -0.28907    0.04349  -6.647
##                                                   Pr(>|t|)    
## (Intercept)                                        < 2e-16 ***
## sj_dem_distancing_pre_post$`percent less than 30` 7.05e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.408 on 567 degrees of freedom
## Multiple R-squared:  0.07228,    Adjusted R-squared:  0.07065 
## F-statistic: 44.18 on 1 and 567 DF,  p-value: 7.055e-11
young_model_frac <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_frac)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent less than 30`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.05070 -0.39278 -0.09951  0.29752  2.94402 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                        2.090437   0.118151  17.693
## sj_dem_distancing_pre_post$`percent less than 30` -0.021512   0.002995  -7.183
##                                                   Pr(>|t|)    
## (Intercept)                                        < 2e-16 ***
## sj_dem_distancing_pre_post$`percent less than 30` 2.16e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6478 on 567 degrees of freedom
## Multiple R-squared:  0.0834, Adjusted R-squared:  0.08178 
## F-statistic: 51.59 on 1 and 567 DF,  p-value: 2.159e-12
sj_dem_distancing_pre_post %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Elderly Population"
  )

elderly_model_dif <- lm(`% increase in staying completely home` ~ `percent elderly`, sj_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent elderly`, 
##     data = sj_dem_distancing_pre_post %>% filter(`percent elderly` < 
##         50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.148  -5.695  -0.102   5.905  31.360 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       22.05114    0.89393   24.67  < 2e-16 ***
## `percent elderly`  0.30740    0.06172    4.98 8.45e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.545 on 564 degrees of freedom
## Multiple R-squared:  0.04212,    Adjusted R-squared:  0.04043 
## F-statistic:  24.8 on 1 and 564 DF,  p-value: 8.453e-07
elderly_model_frac <- lm(frac_increase ~ `percent elderly`, sj_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent elderly`, data = sj_dem_distancing_pre_post %>% 
##     filter(`percent elderly` < 50))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.61924 -0.42393 -0.09733  0.32897  2.99134 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.943831   0.061542  15.336  < 2e-16 ***
## `percent elderly` 0.024971   0.004249   5.876 7.18e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6572 on 564 degrees of freedom
## Multiple R-squared:  0.05769,    Adjusted R-squared:  0.05602 
## F-statistic: 34.53 on 1 and 564 DF,  p-value: 7.18e-09
sj_dem_distancing_pre_post %>%
  ggplot(aes(
  x = `percent less than 18`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 18",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Child Population"
  )

child_model_dif <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`percent less than 18`)
summary(child_model_dif)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`percent less than 18`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.720  -6.175  -0.157   6.296  30.276 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                       21.53928    1.40628  15.317
## sj_dem_distancing_pre_post$`percent less than 18`  0.19796    0.05992   3.304
##                                                   Pr(>|t|)    
## (Intercept)                                        < 2e-16 ***
## sj_dem_distancing_pre_post$`percent less than 18`  0.00102 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.674 on 567 degrees of freedom
## Multiple R-squared:  0.01888,    Adjusted R-squared:  0.01715 
## F-statistic: 10.91 on 1 and 567 DF,  p-value: 0.001015
child_model_frac <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent less than 18`)
summary(child_model_frac)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent less than 18`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6794 -0.4707 -0.1020  0.3291  2.7768 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                       0.947870   0.097375   9.734
## sj_dem_distancing_pre_post$`percent less than 18` 0.014092   0.004149   3.396
##                                                   Pr(>|t|)    
## (Intercept)                                        < 2e-16 ***
## sj_dem_distancing_pre_post$`percent less than 18` 0.000731 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6699 on 567 degrees of freedom
## Multiple R-squared:  0.01994,    Adjusted R-squared:  0.01821 
## F-statistic: 11.53 on 1 and 567 DF,  p-value: 0.0007311
sj_dem_distancing_pre_post %>%
  ggplot(aes(
  x = `percent 20-29`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents ages 20-29",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Young Adult Population"
  )

young_adult_model_dif <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`percent 20-29`)
summary(young_adult_model_dif)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`percent 20-29`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.351  -5.228  -0.144   5.505  30.485 
## 
## Coefficients:
##                                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                32.15132    0.75084   42.82   <2e-16
## sj_dem_distancing_pre_post$`percent 20-29` -0.44465    0.04671   -9.52   <2e-16
##                                               
## (Intercept)                                ***
## sj_dem_distancing_pre_post$`percent 20-29` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.069 on 567 degrees of freedom
## Multiple R-squared:  0.1378, Adjusted R-squared:  0.1363 
## F-statistic: 90.63 on 1 and 567 DF,  p-value: < 2.2e-16
young_adult_model_frac <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent 20-29`)
summary(young_adult_model_frac)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent 20-29`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.73360 -0.39355 -0.08914  0.29531  2.68602 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                                 1.726797   0.051299   33.66
## sj_dem_distancing_pre_post$`percent 20-29` -0.033348   0.003191  -10.45
##                                            Pr(>|t|)    
## (Intercept)                                  <2e-16 ***
## sj_dem_distancing_pre_post$`percent 20-29`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6196 on 567 degrees of freedom
## Multiple R-squared:  0.1615, Adjusted R-squared:   0.16 
## F-statistic: 109.2 on 1 and 567 DF,  p-value: < 2.2e-16

Looking at all age categories present

Here I look at each age bracket individually and see the effect size.

sj_all_age_by_block <- sj_all_age_by_block %>% 
  left_join(sj_dem_distancing_pre_post %>% dplyr::select(blockgroup, `% increase in staying completely home`)) %>%
  mutate(`% 80 and older` = (`80 to 84 years` + `85 years and over`)*100/total)

for (i in 2:(ncol(sj_all_age_by_block)-3)) {
  colName <- colnames(sj_all_age_by_block)[i]
  columnToUse <- sj_all_age_by_block %>% dplyr::select(blockgroup, colName, total)
  percent_vals <- (columnToUse[,2]*100)/columnToUse$total
  print(colName)
  age_bracket_model <- lm(sj_all_age_by_block$`% increase in staying completely home` ~ percent_vals[,1])
  print(summary(age_bracket_model))
}
## [1] "10 to 14 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.991  -6.131  -0.176   6.456  30.687 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        22.5483     0.8633  26.120  < 2e-16 ***
## percent_vals[, 1]   0.5327     0.1183   4.503 8.14e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.597 on 567 degrees of freedom
## Multiple R-squared:  0.03452,    Adjusted R-squared:  0.03282 
## F-statistic: 20.28 on 1 and 567 DF,  p-value: 8.145e-06
## 
## [1] "15 to 17 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.509  -5.778  -0.124   6.033  29.322 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        23.8361     0.7473  31.897  < 2e-16 ***
## percent_vals[, 1]   0.5763     0.1682   3.427 0.000655 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.668 on 567 degrees of freedom
## Multiple R-squared:  0.02029,    Adjusted R-squared:  0.01856 
## F-statistic: 11.74 on 1 and 567 DF,  p-value: 0.0006552
## 
## [1] "18 and 19 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.044  -5.924   0.047   5.460  31.821 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        27.0441     0.5204  51.972  < 2e-16 ***
## percent_vals[, 1]  -0.5128     0.1581  -3.243  0.00125 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.678 on 567 degrees of freedom
## Multiple R-squared:  0.01821,    Adjusted R-squared:  0.01648 
## F-statistic: 10.52 on 1 and 567 DF,  p-value: 0.001254
## 
## [1] "20 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.061  -5.961  -0.373   6.088  30.039 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        27.0614     0.4984   54.30  < 2e-16 ***
## percent_vals[, 1]  -0.8305     0.2251   -3.69 0.000246 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.652 on 567 degrees of freedom
## Multiple R-squared:  0.02345,    Adjusted R-squared:  0.02173 
## F-statistic: 13.62 on 1 and 567 DF,  p-value: 0.0002458
## 
## [1] "21 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.708  -6.011  -0.137   5.863  31.476 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        27.7371     0.4927  56.297  < 2e-16 ***
## percent_vals[, 1]  -1.4197     0.2367  -5.998 3.56e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.471 on 567 degrees of freedom
## Multiple R-squared:  0.05966,    Adjusted R-squared:  0.058 
## F-statistic: 35.97 on 1 and 567 DF,  p-value: 3.563e-09
## 
## [1] "22 to 24 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.246  -5.779  -0.131   5.629  30.419 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        29.4302     0.6325  46.530  < 2e-16 ***
## percent_vals[, 1]  -0.8693     0.1251  -6.947 1.02e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.376 on 567 degrees of freedom
## Multiple R-squared:  0.07845,    Adjusted R-squared:  0.07682 
## F-statistic: 48.27 on 1 and 567 DF,  p-value: 1.024e-11
## 
## [1] "25 to 29 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.409  -5.249  -0.184   5.874  34.126 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       29.61690    0.66939  44.245  < 2e-16 ***
## percent_vals[, 1] -0.49201    0.07335  -6.708 4.79e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.401 on 567 degrees of freedom
## Multiple R-squared:  0.07352,    Adjusted R-squared:  0.07189 
## F-statistic: 44.99 on 1 and 567 DF,  p-value: 4.791e-11
## 
## [1] "30 to 34 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.177  -5.817   0.035   6.345  28.391 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       28.70902    0.77896  36.856  < 2e-16 ***
## percent_vals[, 1] -0.37113    0.09086  -4.085 5.05e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.627 on 567 degrees of freedom
## Multiple R-squared:  0.02859,    Adjusted R-squared:  0.02687 
## F-statistic: 16.68 on 1 and 567 DF,  p-value: 5.049e-05
## 
## [1] "35 to 39 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.156  -5.943  -0.257   5.907  30.526 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        27.1343     0.9485   28.61   <2e-16 ***
## percent_vals[, 1]  -0.1585     0.1183   -1.34    0.181    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.752 on 567 degrees of freedom
## Multiple R-squared:  0.003157,   Adjusted R-squared:  0.001399 
## F-statistic: 1.796 on 1 and 567 DF,  p-value: 0.1808
## 
## [1] "40 to 44 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.906  -5.923  -0.349   5.971  29.308 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        24.0551     1.0005  24.043   <2e-16 ***
## percent_vals[, 1]   0.2678     0.1266   2.115   0.0349 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.729 on 567 degrees of freedom
## Multiple R-squared:  0.007829,   Adjusted R-squared:  0.006079 
## F-statistic: 4.474 on 1 and 567 DF,  p-value: 0.03485
## 
## [1] "45 to 49 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.832  -6.050  -0.073   5.805  28.657 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        22.3496     0.9311  24.003  < 2e-16 ***
## percent_vals[, 1]   0.5039     0.1163   4.334 1.74e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.609 on 567 degrees of freedom
## Multiple R-squared:  0.03206,    Adjusted R-squared:  0.03035 
## F-statistic: 18.78 on 1 and 567 DF,  p-value: 1.736e-05
## 
## [1] "5 to 9 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.114  -6.092   0.149   6.025  30.546 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        24.1255     0.8951  26.952   <2e-16 ***
## percent_vals[, 1]   0.3035     0.1299   2.336   0.0198 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.72 on 567 degrees of freedom
## Multiple R-squared:  0.009535,   Adjusted R-squared:  0.007788 
## F-statistic: 5.458 on 1 and 567 DF,  p-value: 0.01982
## 
## [1] "50 to 54 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.950  -5.335  -0.428   6.052  27.436 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        22.2547     0.8728  25.499  < 2e-16 ***
## percent_vals[, 1]   0.5206     0.1081   4.816 1.88e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.573 on 567 degrees of freedom
## Multiple R-squared:  0.03931,    Adjusted R-squared:  0.03761 
## F-statistic:  23.2 on 1 and 567 DF,  p-value: 1.878e-06
## 
## [1] "55 to 59 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.313  -5.949   0.018   5.624  31.236 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        24.4765     0.8320  29.418   <2e-16 ***
## percent_vals[, 1]   0.2330     0.1118   2.084   0.0376 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.73 on 567 degrees of freedom
## Multiple R-squared:  0.007599,   Adjusted R-squared:  0.005849 
## F-statistic: 4.342 on 1 and 567 DF,  p-value: 0.03763
## 
## [1] "60 and 61 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.170  -6.215  -0.165   5.853  31.591 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        25.1702     0.6834  36.831   <2e-16 ***
## percent_vals[, 1]   0.3311     0.2219   1.492    0.136    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.748 on 567 degrees of freedom
## Multiple R-squared:  0.003911,   Adjusted R-squared:  0.002155 
## F-statistic: 2.227 on 1 and 567 DF,  p-value: 0.1362
## 
## [1] "62 to 64 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.702  -6.044   0.136   5.783  32.507 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        24.2393     0.7313  33.143  < 2e-16 ***
## percent_vals[, 1]   0.5432     0.1889   2.876  0.00418 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.697 on 567 degrees of freedom
## Multiple R-squared:  0.01437,    Adjusted R-squared:  0.01264 
## F-statistic: 8.269 on 1 and 567 DF,  p-value: 0.004184
## 
## [1] "65 and 66 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.960  -6.221  -0.152   5.954  31.554 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        25.5464     0.6608   38.66   <2e-16 ***
## percent_vals[, 1]   0.2301     0.2707    0.85    0.396    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.761 on 567 degrees of freedom
## Multiple R-squared:  0.001273,   Adjusted R-squared:  -0.0004883 
## F-statistic: 0.7228 on 1 and 567 DF,  p-value: 0.3956
## 
## [1] "67 to 69 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.865  -5.815  -0.072   5.929  32.152 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        24.5725     0.6431   38.21  < 2e-16 ***
## percent_vals[, 1]   0.5761     0.2029    2.84  0.00468 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.698 on 567 degrees of freedom
## Multiple R-squared:  0.01402,    Adjusted R-squared:  0.01228 
## F-statistic: 8.064 on 1 and 567 DF,  p-value: 0.004676
## 
## [1] "70 to 74 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.882  -6.028  -0.259   6.076  31.462 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        25.2143     0.6551   38.49   <2e-16 ***
## percent_vals[, 1]   0.2398     0.1588    1.51    0.132    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.748 on 567 degrees of freedom
## Multiple R-squared:  0.004006,   Adjusted R-squared:  0.00225 
## F-statistic: 2.281 on 1 and 567 DF,  p-value: 0.1316
## 
## [1] "75 to 79 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.117  -6.017  -0.038   6.083  30.855 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        25.1171     0.5621  44.686   <2e-16 ***
## percent_vals[, 1]   0.3566     0.1585   2.249   0.0249 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.724 on 567 degrees of freedom
## Multiple R-squared:  0.008843,   Adjusted R-squared:  0.007095 
## F-statistic: 5.059 on 1 and 567 DF,  p-value: 0.02489
## 
## [1] "80 to 84 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.303  -5.709   0.145   6.054  28.797 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        24.3029     0.5441  44.668  < 2e-16 ***
## percent_vals[, 1]   1.0596     0.2306   4.595 5.33e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.59 on 567 degrees of freedom
## Multiple R-squared:  0.03591,    Adjusted R-squared:  0.03421 
## F-statistic: 21.12 on 1 and 567 DF,  p-value: 5.329e-06
## 
## [1] "85 years and over"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.893  -6.254  -0.193   6.058  31.158 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       25.89334    0.52645  49.185   <2e-16 ***
## percent_vals[, 1]  0.05754    0.20218   0.285    0.776    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.766 on 567 degrees of freedom
## Multiple R-squared:  0.0001428,  Adjusted R-squared:  -0.001621 
## F-statistic: 0.081 on 1 and 567 DF,  p-value: 0.7761
## 
## [1] "Under 5 years"
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     percent_vals[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.308  -5.919   0.093   6.349  29.612 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        27.8918     0.8556   32.60   <2e-16 ***
## percent_vals[, 1]  -0.3098     0.1224   -2.53   0.0117 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.712 on 567 degrees of freedom
## Multiple R-squared:  0.01117,    Adjusted R-squared:  0.009423 
## F-statistic: 6.403 on 1 and 567 DF,  p-value: 0.01166
summary(lm(sj_all_age_by_block$`% increase in staying completely home` ~ sj_all_age_by_block$`% 80 and older`))
## 
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~ 
##     sj_all_age_by_block$`% 80 and older`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.889  -5.998  -0.020   5.952  30.026 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           24.8892     0.5791  42.982   <2e-16 ***
## sj_all_age_by_block$`% 80 and older`   0.3404     0.1277   2.666   0.0079 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.706 on 567 degrees of freedom
## Multiple R-squared:  0.01238,    Adjusted R-squared:  0.01064 
## F-statistic: 7.107 on 1 and 567 DF,  p-value: 0.007896

Income

# income - less than $75000
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Households Above 50% AMI"
  )

income_75_model_dif <- lm(`% increase in staying completely home` ~ `% over 75,000`, sj_dem_distancing_pre_post)
summary(income_75_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 75,000`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.177  -4.215   0.654   4.836  24.392 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      7.87685    1.21040   6.508 1.68e-10 ***
## `% over 75,000`  0.29131    0.01867  15.600  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.17 on 567 degrees of freedom
## Multiple R-squared:  0.3003, Adjusted R-squared:  0.2991 
## F-statistic: 243.4 on 1 and 567 DF,  p-value: < 2.2e-16
income_75_model_frac <- lm(frac_increase ~ `% over 75,000`, sj_dem_distancing_pre_post)
summary(income_75_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 75,000`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7826 -0.3206 -0.0471  0.2779  2.4634 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.049686   0.084975   0.585    0.559    
## `% over 75,000` 0.019541   0.001311  14.906   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5736 on 567 degrees of freedom
## Multiple R-squared:  0.2815, Adjusted R-squared:  0.2803 
## F-statistic: 222.2 on 1 and 567 DF,  p-value: < 2.2e-16
# income - less than $100000
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Households Below 80% AMI"
  )

income_100_model_dif <- lm(`% increase in staying completely home` ~ `% over 100,000`, sj_dem_distancing_pre_post)
summary(income_100_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 100,000`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.092  -4.499   0.741   5.093  21.095 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      11.11435    0.93500   11.89   <2e-16 ***
## `% over 100,000`  0.29201    0.01715   17.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.945 on 567 degrees of freedom
## Multiple R-squared:  0.3382, Adjusted R-squared:  0.3371 
## F-statistic: 289.8 on 1 and 567 DF,  p-value: < 2.2e-16
income_100_model_frac <- lm(frac_increase ~ `% over 100,000`, sj_dem_distancing_pre_post)
summary(income_100_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 100,000`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88202 -0.32889 -0.01929  0.27585  2.61194 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.244481   0.065108   3.755 0.000191 ***
## `% over 100,000` 0.020027   0.001194  16.767  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5533 on 567 degrees of freedom
## Multiple R-squared:  0.3315, Adjusted R-squared:  0.3303 
## F-statistic: 281.1 on 1 and 567 DF,  p-value: < 2.2e-16
# income - less than $125000
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Households Below $125,000"
  )

income_125_model_dif <- lm(`% increase in staying completely home` ~ `% over 125,000`, sj_dem_distancing_pre_post)
summary(income_125_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 125,000`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.959  -3.904   0.736   5.089  21.703 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      13.26968    0.77542   17.11   <2e-16 ***
## `% over 125,000`  0.30968    0.01713   18.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.779 on 567 degrees of freedom
## Multiple R-squared:  0.3656, Adjusted R-squared:  0.3645 
## F-statistic: 326.8 on 1 and 567 DF,  p-value: < 2.2e-16
income_125_model_frac <- lm(frac_increase ~ `% over 125,000`, sj_dem_distancing_pre_post)
summary(income_125_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 125,000`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.96210 -0.29148 -0.01108  0.26399  2.49235 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.365429   0.053078   6.885 1.54e-11 ***
## `% over 125,000` 0.021893   0.001173  18.671  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5325 on 567 degrees of freedom
## Multiple R-squared:  0.3807, Adjusted R-squared:  0.3796 
## F-statistic: 348.6 on 1 and 567 DF,  p-value: < 2.2e-16

Language

# language
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and English Language Ability"
  )

english_ability_model_dif <- lm(`% increase in staying completely home` ~ `% speaking english > well`, sj_dem_distancing_pre_post)
summary(english_ability_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% speaking english > well`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.517  -4.925   0.439   5.292  29.880 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -6.43162    3.71381  -1.732   0.0839 .  
## `% speaking english > well`  0.36545    0.04164   8.776   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.164 on 567 degrees of freedom
## Multiple R-squared:  0.1196, Adjusted R-squared:  0.118 
## F-statistic: 77.03 on 1 and 567 DF,  p-value: < 2.2e-16
english_ability_model_frac <- lm(frac_increase ~ `% speaking english > well`, sj_dem_distancing_pre_post)
summary(english_ability_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% speaking english > well`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.56357 -0.35298 -0.02831  0.27952  2.72152 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -1.397386   0.250121  -5.587 3.59e-08 ***
## `% speaking english > well`  0.030007   0.002804  10.700  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6172 on 567 degrees of freedom
## Multiple R-squared:  0.168,  Adjusted R-squared:  0.1665 
## F-statistic: 114.5 on 1 and 567 DF,  p-value: < 2.2e-16
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Spanish Language Ability"
  )

spanish_speaking_model_dif <- lm(`% increase in staying completely home` ~ `% not speaking spanish`, sj_dem_distancing_pre_post)
summary(spanish_speaking_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% not speaking spanish`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.458  -3.785   0.667   5.239  25.779 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               7.35394    1.43198   5.136 3.87e-07 ***
## `% not speaking spanish`  0.23968    0.01784  13.436  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.506 on 567 degrees of freedom
## Multiple R-squared:  0.2415, Adjusted R-squared:  0.2402 
## F-statistic: 180.5 on 1 and 567 DF,  p-value: < 2.2e-16
spanish_speaking_model_frac <- lm(frac_increase ~ `% not speaking spanish`, sj_dem_distancing_pre_post)
summary(spanish_speaking_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% not speaking spanish`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66195 -0.33660 -0.02917  0.26549  2.54504 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.04657    0.09871  -0.472    0.637    
## `% not speaking spanish`  0.01686    0.00123  13.715   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5864 on 567 degrees of freedom
## Multiple R-squared:  0.2491, Adjusted R-squared:  0.2478 
## F-statistic: 188.1 on 1 and 567 DF,  p-value: < 2.2e-16

Occupants per room

# occupants per room
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Room Occupancy"
  )

occupants_model_dif <- lm(`% increase in staying completely home` ~ `percent less than 1`, sj_dem_distancing_pre_post)
summary(occupants_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent less than 1`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.716  -4.963   0.431   5.163  27.440 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -7.23400    3.05051  -2.371   0.0181 *  
## `percent less than 1`  0.36894    0.03362  10.972   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.871 on 567 degrees of freedom
## Multiple R-squared:  0.1751, Adjusted R-squared:  0.1737 
## F-statistic: 120.4 on 1 and 567 DF,  p-value: < 2.2e-16
occupants_model_frac <- lm(frac_increase ~ `percent less than 1`, sj_dem_distancing_pre_post)
summary(occupants_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent less than 1`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.60393 -0.37465 -0.06035  0.26533  2.65250 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -1.158279   0.208900  -5.545 4.52e-08 ***
## `percent less than 1`  0.026907   0.002303  11.685  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6075 on 567 degrees of freedom
## Multiple R-squared:  0.1941, Adjusted R-squared:  0.1927 
## F-statistic: 136.5 on 1 and 567 DF,  p-value: < 2.2e-16

Vehicle ownership

# vehicles - percent with no vehicles
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Vehicle Availability"
  )

vehicles_model_dif <- lm(`% increase in staying completely home` ~ `percent with vehicles`, sj_dem_distancing_pre_post)
summary(vehicles_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent with vehicles`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.781  -5.993  -0.092   5.493  30.166 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -9.2523     4.9742  -1.860   0.0634 .  
## `percent with vehicles`   0.3709     0.0522   7.107 3.59e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.359 on 567 degrees of freedom
## Multiple R-squared:  0.08179,    Adjusted R-squared:  0.08017 
## F-statistic: 50.51 on 1 and 567 DF,  p-value: 3.586e-12
vehicles_model_frac <- lm(frac_increase ~ `percent with vehicles`, sj_dem_distancing_pre_post)
summary(vehicles_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent with vehicles`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7565 -0.4311 -0.1077  0.2971  2.9043 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -0.972802   0.347061  -2.803  0.00524 ** 
## `percent with vehicles`  0.023551   0.003642   6.467 2.16e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.653 on 567 degrees of freedom
## Multiple R-squared:  0.06869,    Adjusted R-squared:  0.06704 
## F-statistic: 41.82 on 1 and 567 DF,  p-value: 2.165e-10

Education

sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent associates or higher`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people with an degree at Associate's level or higher",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Education"
  )

educ_model_dif <- lm(`% increase in staying completely home` ~ `percent associates or higher`, sj_dem_distancing_pre_post)
summary(educ_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent associates or higher`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.077  -3.136   1.162   5.123  22.495 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    12.87972    0.90096   14.29   <2e-16 ***
## `percent associates or higher`  0.27797    0.01768   15.72   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.151 on 567 degrees of freedom
## Multiple R-squared:  0.3036, Adjusted R-squared:  0.3024 
## F-statistic: 247.2 on 1 and 567 DF,  p-value: < 2.2e-16
educ_model_frac <- lm(frac_increase ~ `percent associates or higher`, sj_dem_distancing_pre_post)
summary(educ_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent associates or higher`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.25246 -0.30929  0.00784  0.28169  2.47226 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.34752    0.06215   5.592  3.5e-08 ***
## `percent associates or higher`  0.01945    0.00122  15.946  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5622 on 567 degrees of freedom
## Multiple R-squared:  0.3096, Adjusted R-squared:  0.3084 
## F-statistic: 254.3 on 1 and 567 DF,  p-value: < 2.2e-16

Internet

sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent high speed`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with broadband such as cable, fiber optic or DSL",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and High Speed Internet"
  )

internet_model_dif <- lm(`% increase in staying completely home` ~ `percent high speed`, sj_dem_distancing_pre_post)
summary(internet_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent high speed`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.225  -5.235   0.465   5.284  26.928 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -3.75054    2.38634  -1.572    0.117    
## `percent high speed`  0.36925    0.02929  12.608   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.632 on 567 degrees of freedom
## Multiple R-squared:  0.219,  Adjusted R-squared:  0.2176 
## F-statistic:   159 on 1 and 567 DF,  p-value: < 2.2e-16
internet_model_frac <- lm(frac_increase ~ `percent high speed`, sj_dem_distancing_pre_post)
summary(internet_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent high speed`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.69491 -0.34717 -0.06543  0.26220  2.63808 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.68066    0.16782  -4.056  5.7e-05 ***
## `percent high speed`  0.02415    0.00206  11.726  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6071 on 567 degrees of freedom
## Multiple R-squared:  0.1952, Adjusted R-squared:  0.1938 
## F-statistic: 137.5 on 1 and 567 DF,  p-value: < 2.2e-16

Sex of workers

sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% male workers`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of workers that are male",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Sex of Workers"
  )

sex_workers_model_dif <- lm(`% increase in staying completely home` ~ `% male workers`, sj_dem_distancing_pre_post)
summary(sex_workers_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% male workers`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.179  -6.300  -0.149   5.945  31.056 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      23.54532    6.34967   3.708 0.000229 ***
## `% male workers`  0.04564    0.11842   0.385 0.700071    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.766 on 567 degrees of freedom
## Multiple R-squared:  0.0002619,  Adjusted R-squared:  -0.001501 
## F-statistic: 0.1485 on 1 and 567 DF,  p-value: 0.7001
sex_workers_model_frac <- lm(frac_increase ~ `% male workers`, sj_dem_distancing_pre_post)
summary(sex_workers_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% male workers`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8146 -0.4471 -0.1020  0.3269  2.8506 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      0.690056   0.439299   1.571    0.117
## `% male workers` 0.010736   0.008193   1.310    0.191
## 
## Residual standard error: 0.6756 on 567 degrees of freedom
## Multiple R-squared:  0.003019,   Adjusted R-squared:  0.001261 
## F-statistic: 1.717 on 1 and 567 DF,  p-value: 0.1906

Race

# white
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% white`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are white",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and White Residents"
  )

white_model_dif <- lm(`% increase in staying completely home` ~ `% white`, sj_dem_distancing_pre_post)
summary(white_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% white`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.929  -5.776  -0.198   5.860  32.009 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 24.56566    0.90752  27.069   <2e-16 ***
## `% white`    0.03302    0.01882   1.754   0.0799 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.741 on 567 degrees of freedom
## Multiple R-squared:  0.005399,   Adjusted R-squared:  0.003645 
## F-statistic: 3.078 on 1 and 567 DF,  p-value: 0.0799
white_model_frac <- lm(frac_increase ~ `% white`, sj_dem_distancing_pre_post)
summary(white_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% white`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.75838 -0.42186 -0.07865  0.30372  2.91403 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.991219   0.061719  16.060  < 2e-16 ***
## `% white`   0.006348   0.001280   4.959  9.4e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6625 on 567 degrees of freedom
## Multiple R-squared:  0.04156,    Adjusted R-squared:  0.03987 
## F-statistic: 24.59 on 1 and 567 DF,  p-value: 9.398e-07
# asian
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% Asian`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are Asian",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Asian Residents"
  )

asian_model_dif <- lm(`% increase in staying completely home` ~ `% Asian`, sj_dem_distancing_pre_post)
summary(asian_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% Asian`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.882  -5.736  -0.167   5.824  25.556 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 21.54149    0.70434  30.584  < 2e-16 ***
## `% Asian`    0.13699    0.01807   7.582  1.4e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.307 on 567 degrees of freedom
## Multiple R-squared:  0.09205,    Adjusted R-squared:  0.09045 
## F-statistic: 57.49 on 1 and 567 DF,  p-value: 1.399e-13
asian_model_frac <- lm(frac_increase ~ `% Asian`, sj_dem_distancing_pre_post)
summary(asian_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% Asian`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7656 -0.4570 -0.1305  0.2997  2.8810 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.098222   0.050519  21.739  < 2e-16 ***
## `% Asian`   0.005124   0.001296   3.954 8.66e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6675 on 567 degrees of freedom
## Multiple R-squared:  0.02683,    Adjusted R-squared:  0.02512 
## F-statistic: 15.63 on 1 and 567 DF,  p-value: 8.659e-05
# hispanic/latino
sj_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% non hispanic/latino`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are not Hispanic or Latino",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "San Jose: Social Distancing and Hispanic/Latino Residents"
  )

hisp_model_dif <- lm(`% increase in staying completely home` ~ `% non hispanic/latino`, sj_dem_distancing_pre_post)
summary(hisp_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% non hispanic/latino`, 
##     data = sj_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.294  -3.995   0.915   5.057  24.024 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             10.49417    1.10795   9.472   <2e-16 ***
## `% non hispanic/latino`  0.22772    0.01546  14.730   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.306 on 567 degrees of freedom
## Multiple R-squared:  0.2768, Adjusted R-squared:  0.2755 
## F-statistic:   217 on 1 and 567 DF,  p-value: < 2.2e-16
hisp_model_frac <- lm(frac_increase ~ `% non hispanic/latino`, sj_dem_distancing_pre_post)
summary(hisp_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% non hispanic/latino`, data = sj_dem_distancing_pre_post)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.72544 -0.34878 -0.02194  0.29341  2.50892 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.198857   0.076969   2.584     0.01 *  
## `% non hispanic/latino` 0.015663   0.001074  14.584   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.577 on 567 degrees of freedom
## Multiple R-squared:  0.2728, Adjusted R-squared:  0.2715 
## F-statistic: 212.7 on 1 and 567 DF,  p-value: < 2.2e-16

Multiple regression for increases in staying at home

Multiple regression analysis: income, internet, Spanish language ability, and occupants per room.

First with difference in percents.

difs_model <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent less than 1` + sj_dem_distancing_pre_post$`percent high speed`)
summary(difs_model)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` + 
##         sj_dem_distancing_pre_post$`percent less than 1` + sj_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.907  -3.949   0.728   4.776  21.252 
## 
## Coefficients:
##                                                     Estimate Std. Error t value
## (Intercept)                                          5.63762    3.24947   1.735
## sj_dem_distancing_pre_post$`% over 125,000`          0.23198    0.02430   9.546
## sj_dem_distancing_pre_post$`% not speaking spanish`  0.08128    0.02515   3.231
## sj_dem_distancing_pre_post$`percent less than 1`    -0.00691    0.04337  -0.159
## sj_dem_distancing_pre_post$`percent high speed`      0.06365    0.03634   1.752
##                                                     Pr(>|t|)    
## (Intercept)                                           0.0833 .  
## sj_dem_distancing_pre_post$`% over 125,000`           <2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish`   0.0013 ** 
## sj_dem_distancing_pre_post$`percent less than 1`      0.8735    
## sj_dem_distancing_pre_post$`percent high speed`       0.0804 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.647 on 564 degrees of freedom
## Multiple R-squared:  0.3903, Adjusted R-squared:  0.3859 
## F-statistic: 90.24 on 4 and 564 DF,  p-value: < 2.2e-16

Second with fractional change.

frac_model <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent less than 1` + sj_dem_distancing_pre_post$`percent high speed`)
summary(frac_model)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent less than 1` + 
##     sj_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.93950 -0.30464  0.01285  0.26717  2.39582 
## 
## Coefficients:
##                                                      Estimate Std. Error
## (Intercept)                                         -0.116752   0.222809
## sj_dem_distancing_pre_post$`% over 125,000`          0.017337   0.001666
## sj_dem_distancing_pre_post$`% not speaking spanish`  0.005484   0.001725
## sj_dem_distancing_pre_post$`percent less than 1`     0.002277   0.002974
## sj_dem_distancing_pre_post$`percent high speed`      0.000471   0.002492
##                                                     t value Pr(>|t|)    
## (Intercept)                                          -0.524  0.60048    
## sj_dem_distancing_pre_post$`% over 125,000`          10.404  < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish`   3.179  0.00156 ** 
## sj_dem_distancing_pre_post$`percent less than 1`      0.766  0.44420    
## sj_dem_distancing_pre_post$`percent high speed`       0.189  0.85013    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5243 on 564 degrees of freedom
## Multiple R-squared:  0.4027, Adjusted R-squared:  0.3985 
## F-statistic: 95.07 on 4 and 564 DF,  p-value: < 2.2e-16

Multiple regression analysis: income and Spanish language ability

difs_model_inc_span <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish`)
summary(difs_model_inc_span)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.789  -4.176   0.967   4.817  21.958 
## 
## Coefficients:
##                                                     Estimate Std. Error t value
## (Intercept)                                          8.64093    1.29332   6.681
## sj_dem_distancing_pre_post$`% over 125,000`          0.24999    0.02158  11.587
## sj_dem_distancing_pre_post$`% not speaking spanish`  0.09107    0.02055   4.432
##                                                     Pr(>|t|)    
## (Intercept)                                         5.68e-11 ***
## sj_dem_distancing_pre_post$`% over 125,000`          < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 1.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.654 on 566 degrees of freedom
## Multiple R-squared:  0.3869, Adjusted R-squared:  0.3847 
## F-statistic: 178.6 on 2 and 566 DF,  p-value: < 2.2e-16
frac_model_inc_span <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish`)
summary(frac_model_inc_span)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% not speaking spanish`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9503 -0.3015  0.0121  0.2680  2.4100 
## 
## Coefficients:
##                                                     Estimate Std. Error t value
## (Intercept)                                         0.044852   0.088493   0.507
## sj_dem_distancing_pre_post$`% over 125,000`         0.017759   0.001476  12.030
## sj_dem_distancing_pre_post$`% not speaking spanish` 0.006307   0.001406   4.486
##                                                     Pr(>|t|)    
## (Intercept)                                            0.612    
## sj_dem_distancing_pre_post$`% over 125,000`          < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 8.77e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5237 on 566 degrees of freedom
## Multiple R-squared:  0.402,  Adjusted R-squared:  0.3999 
## F-statistic: 190.2 on 2 and 566 DF,  p-value: < 2.2e-16

When only accounting for for income, Spanish language ability is only slightly relevant, though the result is still nontrivial. Let’s try accounting for both education and income level.

Multiple regression analysis: income, education, and Spanish language ability

difs_model_inc_span_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` +  sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_span_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` + 
##         sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.546  -3.593   0.911   4.910  21.396 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                                9.68265    1.32021
## sj_dem_distancing_pre_post$`% over 125,000`                0.21515    0.02384
## sj_dem_distancing_pre_post$`% not speaking spanish`        0.03589    0.02633
## sj_dem_distancing_pre_post$`percent associates or higher`  0.09922    0.02999
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 7.334 7.78e-13 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 9.025  < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish`         1.363 0.173282    
## sj_dem_distancing_pre_post$`percent associates or higher`   3.308 0.000999 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.588 on 565 degrees of freedom
## Multiple R-squared:  0.3986, Adjusted R-squared:  0.3954 
## F-statistic: 124.8 on 3 and 565 DF,  p-value: < 2.2e-16
frac_model_inc_span_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` +  sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_span_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89671 -0.29727  0.01081  0.26240  2.44073 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               0.113017   0.090407
## sj_dem_distancing_pre_post$`% over 125,000`               0.015480   0.001633
## sj_dem_distancing_pre_post$`% not speaking spanish`       0.002697   0.001803
## sj_dem_distancing_pre_post$`percent associates or higher` 0.006492   0.002054
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 1.250  0.21178    
## sj_dem_distancing_pre_post$`% over 125,000`                 9.482  < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish`         1.496  0.13523    
## sj_dem_distancing_pre_post$`percent associates or higher`   3.161  0.00166 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5196 on 565 degrees of freedom
## Multiple R-squared:  0.4124, Adjusted R-squared:  0.4093 
## F-statistic: 132.2 on 3 and 565 DF,  p-value: < 2.2e-16

The effect of Spanish language speaking vanishes when accounting for both education and income.

Multiple regression analysis: income, English language ability and education

difs_model_inc_eng_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` +  sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_eng_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` + 
##         sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.522  -3.601   1.026   4.761  21.422 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               24.65467    3.64708
## sj_dem_distancing_pre_post$`% over 125,000`                0.23731    0.02372
## sj_dem_distancing_pre_post$`% speaking english > well`    -0.18905    0.04927
## sj_dem_distancing_pre_post$`percent associates or higher`  0.17724    0.02666
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 6.760 3.44e-11 ***
## sj_dem_distancing_pre_post$`% over 125,000`                10.005  < 2e-16 ***
## sj_dem_distancing_pre_post$`% speaking english > well`     -3.837 0.000138 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   6.647 7.05e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.503 on 565 degrees of freedom
## Multiple R-squared:  0.4119, Adjusted R-squared:  0.4088 
## F-statistic: 131.9 on 3 and 565 DF,  p-value: < 2.2e-16
frac_model_inc_eng_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` +  sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_eng_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% speaking english > well` + 
##     sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88802 -0.28472  0.01245  0.25950  2.45681 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                0.513333   0.252739
## sj_dem_distancing_pre_post$`% over 125,000`                0.016230   0.001644
## sj_dem_distancing_pre_post$`% speaking english > well`    -0.004138   0.003414
## sj_dem_distancing_pre_post$`percent associates or higher`  0.009579   0.001848
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 2.031   0.0427 *  
## sj_dem_distancing_pre_post$`% over 125,000`                 9.874  < 2e-16 ***
## sj_dem_distancing_pre_post$`% speaking english > well`     -1.212   0.2261    
## sj_dem_distancing_pre_post$`percent associates or higher`   5.184 3.02e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.52 on 565 degrees of freedom
## Multiple R-squared:  0.4116, Adjusted R-squared:  0.4085 
## F-statistic: 131.7 on 3 and 565 DF,  p-value: < 2.2e-16

English language ability is actually a slightly better predictor than Spanish language ability, when also accounting for education and income.

Multiple regression analysis: income, English language ability, education, Spanish language ability, and vehicle ownership

difs_model_lots <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` +  sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent with vehicles`)
summary(difs_model_lots)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` + 
##         sj_dem_distancing_pre_post$`percent associates or higher` + 
##         sj_dem_distancing_pre_post$`% not speaking spanish` + 
##         sj_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.033  -3.603   1.120   4.735  21.510 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               16.12538    5.39958
## sj_dem_distancing_pre_post$`% over 125,000`                0.21512    0.02548
## sj_dem_distancing_pre_post$`% speaking english > well`    -0.20150    0.04933
## sj_dem_distancing_pre_post$`percent associates or higher`  0.15078    0.03199
## sj_dem_distancing_pre_post$`% not speaking spanish`        0.04632    0.02605
## sj_dem_distancing_pre_post$`percent with vehicles`         0.08623    0.04629
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 2.986  0.00295 ** 
## sj_dem_distancing_pre_post$`% over 125,000`                 8.442 2.65e-16 ***
## sj_dem_distancing_pre_post$`% speaking english > well`     -4.085 5.06e-05 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   4.713 3.08e-06 ***
## sj_dem_distancing_pre_post$`% not speaking spanish`         1.778  0.07588 .  
## sj_dem_distancing_pre_post$`percent with vehicles`          1.863  0.06298 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.475 on 563 degrees of freedom
## Multiple R-squared:  0.4184, Adjusted R-squared:  0.4133 
## F-statistic: 81.01 on 5 and 563 DF,  p-value: < 2.2e-16
frac_model_lots <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` +  sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent with vehicles`)
summary(frac_model_lots)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% speaking english > well` + 
##     sj_dem_distancing_pre_post$`percent associates or higher` + 
##     sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89482 -0.29338  0.00822  0.26264  2.43136 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                0.250335   0.375279
## sj_dem_distancing_pre_post$`% over 125,000`                0.015450   0.001771
## sj_dem_distancing_pre_post$`% speaking english > well`    -0.004706   0.003429
## sj_dem_distancing_pre_post$`percent associates or higher`  0.007699   0.002224
## sj_dem_distancing_pre_post$`% not speaking spanish`        0.002944   0.001810
## sj_dem_distancing_pre_post$`percent with vehicles`         0.002160   0.003217
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 0.667 0.505005    
## sj_dem_distancing_pre_post$`% over 125,000`                 8.724  < 2e-16 ***
## sj_dem_distancing_pre_post$`% speaking english > well`     -1.373 0.170446    
## sj_dem_distancing_pre_post$`percent associates or higher`   3.463 0.000576 ***
## sj_dem_distancing_pre_post$`% not speaking spanish`         1.626 0.104404    
## sj_dem_distancing_pre_post$`percent with vehicles`          0.671 0.502199    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5195 on 563 degrees of freedom
## Multiple R-squared:  0.4147, Adjusted R-squared:  0.4095 
## F-statistic: 79.78 on 5 and 563 DF,  p-value: < 2.2e-16

The main important variables are education and income, with potentially some effect of English language ability.

Multiple regression analysis: income and education

difs_model_inc_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.464  -3.624   1.004   4.865  21.188 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               11.04674    0.86206
## sj_dem_distancing_pre_post$`% over 125,000`                0.22014    0.02358
## sj_dem_distancing_pre_post$`percent associates or higher`  0.12512    0.02322
##                                                           t value Pr(>|t|)    
## (Intercept)                                                12.814  < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 9.338  < 2e-16 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   5.388 1.05e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.594 on 566 degrees of freedom
## Multiple R-squared:  0.3966, Adjusted R-squared:  0.3945 
## F-statistic:   186 on 2 and 566 DF,  p-value: < 2.2e-16
frac_model_inc_educ <- lm(sj_dem_distancing_pre_post$`frac_increase` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88364 -0.28571  0.01015  0.26101  2.47104 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               0.215507   0.059053
## sj_dem_distancing_pre_post$`% over 125,000`               0.015854   0.001615
## sj_dem_distancing_pre_post$`percent associates or higher` 0.008439   0.001591
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 3.649 0.000287 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 9.817  < 2e-16 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   5.305 1.62e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5202 on 566 degrees of freedom
## Multiple R-squared:  0.4101, Adjusted R-squared:  0.408 
## F-statistic: 196.7 on 2 and 566 DF,  p-value: < 2.2e-16

Comparing this to earlier models, we see that adding the English language ability variable does add some predictive power, though not much, and adding the vehicle ownership and Spanish language ability variables have negligible effects.

We now consider adding race into the regressions.

Multiple regression analysis: Hispanic/Latino and income

difs_model_inc_hisp <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% non hispanic/latino`)
summary(difs_model_inc_hisp)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% non hispanic/latino`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.908  -3.877   0.969   5.002  20.996 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                         9.51458    1.01569   9.368
## sj_dem_distancing_pre_post$`% over 125,000`         0.23251    0.02175  10.688
## sj_dem_distancing_pre_post$`% non hispanic/latino`  0.10178    0.01839   5.535
##                                                    Pr(>|t|)    
## (Intercept)                                         < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000`         < 2e-16 ***
## sj_dem_distancing_pre_post$`% non hispanic/latino` 4.76e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.584 on 566 degrees of freedom
## Multiple R-squared:  0.3982, Adjusted R-squared:  0.3961 
## F-statistic: 187.3 on 2 and 566 DF,  p-value: < 2.2e-16
frac_model_inc_hisp <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% non hispanic/latino`)
summary(frac_model_inc_hisp)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% non hispanic/latino`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9513 -0.3009  0.0227  0.2690  2.4414 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                        0.127242   0.069791   1.823
## sj_dem_distancing_pre_post$`% over 125,000`        0.016998   0.001495  11.371
## sj_dem_distancing_pre_post$`% non hispanic/latino` 0.006456   0.001263   5.110
##                                                    Pr(>|t|)    
## (Intercept)                                          0.0688 .  
## sj_dem_distancing_pre_post$`% over 125,000`         < 2e-16 ***
## sj_dem_distancing_pre_post$`% non hispanic/latino` 4.42e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5211 on 566 degrees of freedom
## Multiple R-squared:  0.408,  Adjusted R-squared:  0.4059 
## F-statistic: 195.1 on 2 and 566 DF,  p-value: < 2.2e-16

Multiple regression analysis: Hispanic/Latino, income, and education

difs_model_inc_hisp_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% non hispanic/latino` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_hisp_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% non hispanic/latino` + 
##         sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.899  -3.816   0.963   4.863  20.971 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                                9.66595    1.01443
## sj_dem_distancing_pre_post$`% over 125,000`                0.21097    0.02374
## sj_dem_distancing_pre_post$`% non hispanic/latino`         0.06381    0.02502
## sj_dem_distancing_pre_post$`percent associates or higher`  0.07032    0.03155
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 9.528   <2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 8.888   <2e-16 ***
## sj_dem_distancing_pre_post$`% non hispanic/latino`          2.551   0.0110 *  
## sj_dem_distancing_pre_post$`percent associates or higher`   2.229   0.0262 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.557 on 565 degrees of freedom
## Multiple R-squared:  0.4035, Adjusted R-squared:  0.4003 
## F-statistic: 127.4 on 3 and 565 DF,  p-value: < 2.2e-16
frac_model_inc_hisp_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% non hispanic/latino` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_hisp_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% non hispanic/latino` + sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.90598 -0.28645  0.01607  0.25376  2.45075 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               0.138858   0.069628
## sj_dem_distancing_pre_post$`% over 125,000`               0.015345   0.001629
## sj_dem_distancing_pre_post$`% non hispanic/latino`        0.003542   0.001717
## sj_dem_distancing_pre_post$`percent associates or higher` 0.005397   0.002166
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 1.994   0.0466 *  
## sj_dem_distancing_pre_post$`% over 125,000`                 9.419   <2e-16 ***
## sj_dem_distancing_pre_post$`% non hispanic/latino`          2.063   0.0396 *  
## sj_dem_distancing_pre_post$`percent associates or higher`   2.492   0.0130 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5187 on 565 degrees of freedom
## Multiple R-squared:  0.4145, Adjusted R-squared:  0.4114 
## F-statistic: 133.3 on 3 and 565 DF,  p-value: < 2.2e-16

When including education, percentage of Hispanic/Latino residents loses as much of its predictive power.

Multiple regression analysis: income, education, and white residents

difs_model_inc_white_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% white` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_white_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% white` + 
##         sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.270  -3.744   1.144   4.781  18.169 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               12.66041    0.93132
## sj_dem_distancing_pre_post$`% over 125,000`                0.22811    0.02331
## sj_dem_distancing_pre_post$`% white`                      -0.06482    0.01533
## sj_dem_distancing_pre_post$`percent associates or higher`  0.14315    0.02328
##                                                           t value Pr(>|t|)    
## (Intercept)                                                13.594  < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 9.787  < 2e-16 ***
## sj_dem_distancing_pre_post$`% white`                       -4.227 2.76e-05 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   6.150 1.47e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.483 on 565 degrees of freedom
## Multiple R-squared:  0.4151, Adjusted R-squared:  0.412 
## F-statistic: 133.7 on 3 and 565 DF,  p-value: < 2.2e-16
frac_model_inc_white_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% white` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_white_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% white` + sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88298 -0.28557  0.01044  0.26052  2.47163 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                2.160e-01  6.480e-02
## sj_dem_distancing_pre_post$`% over 125,000`                1.586e-02  1.622e-03
## sj_dem_distancing_pre_post$`% white`                      -1.916e-05  1.067e-03
## sj_dem_distancing_pre_post$`percent associates or higher`  8.444e-03  1.620e-03
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 3.333 0.000915 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 9.778  < 2e-16 ***
## sj_dem_distancing_pre_post$`% white`                       -0.018 0.985682    
## sj_dem_distancing_pre_post$`percent associates or higher`   5.214  2.6e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5207 on 565 degrees of freedom
## Multiple R-squared:  0.4101, Adjusted R-squared:  0.4069 
## F-statistic: 130.9 on 3 and 565 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, and Asian residents

difs_model_inc_asian_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_asian_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + 
##         sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.625  -3.797   1.051   4.789  17.718 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                                9.59406    0.88983
## sj_dem_distancing_pre_post$`% over 125,000`                0.21681    0.02308
## sj_dem_distancing_pre_post$`% Asian`                       0.07632    0.01489
## sj_dem_distancing_pre_post$`percent associates or higher`  0.10630    0.02302
##                                                           t value Pr(>|t|)    
## (Intercept)                                                10.782  < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 9.396  < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian`                        5.124 4.11e-07 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   4.619 4.79e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.43 on 565 degrees of freedom
## Multiple R-squared:  0.4234, Adjusted R-squared:  0.4203 
## F-statistic: 138.3 on 3 and 565 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_asian_educ)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.86840 -0.28654  0.01026  0.26108  2.48279 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                               0.2045490  0.0623384
## sj_dem_distancing_pre_post$`% over 125,000`               0.0158292  0.0016166
## sj_dem_distancing_pre_post$`% Asian`                      0.0005757  0.0010435
## sj_dem_distancing_pre_post$`percent associates or higher` 0.0082967  0.0016124
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 3.281   0.0011 ** 
## sj_dem_distancing_pre_post$`% over 125,000`                 9.792  < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian`                        0.552   0.5814    
## sj_dem_distancing_pre_post$`percent associates or higher`   5.146 3.69e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5205 on 565 degrees of freedom
## Multiple R-squared:  0.4104, Adjusted R-squared:  0.4072 
## F-statistic: 131.1 on 3 and 565 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, and English language ability

difs_model_inc_asian_educ_eng <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`% speaking english > well`)
summary(difs_model_inc_asian_educ_eng)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + 
##         sj_dem_distancing_pre_post$`percent associates or higher` + 
##         sj_dem_distancing_pre_post$`% speaking english > well`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.235  -3.769   1.173   4.732  18.220 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               13.97982    4.73755
## sj_dem_distancing_pre_post$`% over 125,000`                0.22256    0.02387
## sj_dem_distancing_pre_post$`% Asian`                       0.06549    0.01881
## sj_dem_distancing_pre_post$`percent associates or higher`  0.12498    0.03037
## sj_dem_distancing_pre_post$`% speaking english > well`    -0.05807    0.06161
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 2.951 0.003301 ** 
## sj_dem_distancing_pre_post$`% over 125,000`                 9.324  < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian`                        3.482 0.000537 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   4.115 4.45e-05 ***
## sj_dem_distancing_pre_post$`% speaking english > well`     -0.943 0.346330    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.431 on 564 degrees of freedom
## Multiple R-squared:  0.4243, Adjusted R-squared:  0.4202 
## F-statistic: 103.9 on 4 and 564 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_eng <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`% speaking english > well`)
summary(frac_model_inc_asian_educ_eng)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + 
##     sj_dem_distancing_pre_post$`% speaking english > well`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89694 -0.28374  0.01348  0.25783  2.44830 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.5642023  0.3318014
## sj_dem_distancing_pre_post$`% over 125,000`                0.0163004  0.0016717
## sj_dem_distancing_pre_post$`% Asian`                      -0.0003121  0.0013174
## sj_dem_distancing_pre_post$`percent associates or higher`  0.0098283  0.0021272
## sj_dem_distancing_pre_post$`% speaking english > well`    -0.0047617  0.0043148
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 1.700   0.0896 .  
## sj_dem_distancing_pre_post$`% over 125,000`                 9.751  < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian`                       -0.237   0.8128    
## sj_dem_distancing_pre_post$`percent associates or higher`   4.620 4.75e-06 ***
## sj_dem_distancing_pre_post$`% speaking english > well`     -1.104   0.2702    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5204 on 564 degrees of freedom
## Multiple R-squared:  0.4116, Adjusted R-squared:  0.4075 
## F-statistic: 98.65 on 4 and 564 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, and residents ages 20-29

difs_model_inc_asian_educ_youngadult <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent 20-29`)
summary(difs_model_inc_asian_educ_youngadult)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + 
##         sj_dem_distancing_pre_post$`percent associates or higher` + 
##         sj_dem_distancing_pre_post$`percent 20-29`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.640  -3.789   1.044   4.777  17.351 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               14.00086    1.17184
## sj_dem_distancing_pre_post$`% over 125,000`                0.17687    0.02359
## sj_dem_distancing_pre_post$`% Asian`                       0.07910    0.01452
## sj_dem_distancing_pre_post$`percent associates or higher`  0.11175    0.02244
## sj_dem_distancing_pre_post$`percent 20-29`                -0.22459    0.04018
##                                                           t value Pr(>|t|)    
## (Intercept)                                                11.948  < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 7.497 2.54e-13 ***
## sj_dem_distancing_pre_post$`% Asian`                        5.448 7.63e-08 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   4.979 8.51e-07 ***
## sj_dem_distancing_pre_post$`percent 20-29`                 -5.589 3.56e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.239 on 564 degrees of freedom
## Multiple R-squared:  0.4536, Adjusted R-squared:  0.4498 
## F-statistic: 117.1 on 4 and 564 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_youngadult <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent 20-29`)
summary(frac_model_inc_asian_educ_youngadult)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + 
##     sj_dem_distancing_pre_post$`percent 20-29`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.01457 -0.27025  0.00498  0.25510  2.34055 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.5500038  0.0815206
## sj_dem_distancing_pre_post$`% over 125,000`                0.0126975  0.0016411
## sj_dem_distancing_pre_post$`% Asian`                       0.0007937  0.0010101
## sj_dem_distancing_pre_post$`percent associates or higher`  0.0087238  0.0015614
## sj_dem_distancing_pre_post$`percent 20-29`                -0.0176061  0.0027954
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 6.747 3.76e-11 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 7.737 4.71e-14 ***
## sj_dem_distancing_pre_post$`% Asian`                        0.786    0.432    
## sj_dem_distancing_pre_post$`percent associates or higher`   5.587 3.59e-08 ***
## sj_dem_distancing_pre_post$`percent 20-29`                 -6.298 6.06e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5036 on 564 degrees of freedom
## Multiple R-squared:  0.4491, Adjusted R-squared:  0.4452 
## F-statistic:   115 on 4 and 564 DF,  p-value: < 2.2e-16

Though looking at percent less than 30 doesn’t have predictive power with these variables, percent of young adults does.

Multiple regression analysis: income, education, Asian residents, and residents less than 18

difs_model_inc_asian_educ_child <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent less than 18`)
summary(difs_model_inc_asian_educ_child)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + 
##         sj_dem_distancing_pre_post$`percent associates or higher` + 
##         sj_dem_distancing_pre_post$`percent less than 18`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.129  -3.433   1.176   4.399  16.517 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                                2.12353    1.44683
## sj_dem_distancing_pre_post$`% over 125,000`                0.18738    0.02276
## sj_dem_distancing_pre_post$`% Asian`                       0.08487    0.01445
## sj_dem_distancing_pre_post$`percent associates or higher`  0.14284    0.02295
## sj_dem_distancing_pre_post$`percent less than 18`          0.29721    0.04630
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 1.468    0.143    
## sj_dem_distancing_pre_post$`% over 125,000`                 8.232 1.28e-15 ***
## sj_dem_distancing_pre_post$`% Asian`                        5.872 7.34e-09 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   6.223 9.53e-10 ***
## sj_dem_distancing_pre_post$`percent less than 18`           6.420 2.90e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.179 on 564 degrees of freedom
## Multiple R-squared:  0.4626, Adjusted R-squared:  0.4588 
## F-statistic: 121.4 on 4 and 564 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_child <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent less than 18`)
summary(frac_model_inc_asian_educ_child)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + 
##     sj_dem_distancing_pre_post$`percent less than 18`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.95699 -0.26982  0.01172  0.25011  2.62023 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                               -0.284226   0.101833
## sj_dem_distancing_pre_post$`% over 125,000`                0.013903   0.001602
## sj_dem_distancing_pre_post$`% Asian`                       0.001135   0.001017
## sj_dem_distancing_pre_post$`percent associates or higher`  0.010688   0.001616
## sj_dem_distancing_pre_post$`percent less than 18`          0.019445   0.003258
##                                                           t value Pr(>|t|)    
## (Intercept)                                                -2.791  0.00543 ** 
## sj_dem_distancing_pre_post$`% over 125,000`                 8.678  < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian`                        1.116  0.26481    
## sj_dem_distancing_pre_post$`percent associates or higher`   6.615 8.63e-11 ***
## sj_dem_distancing_pre_post$`percent less than 18`           5.968 4.25e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5053 on 564 degrees of freedom
## Multiple R-squared:  0.4454, Adjusted R-squared:  0.4415 
## F-statistic: 113.2 on 4 and 564 DF,  p-value: < 2.2e-16

Similarly, looking at percent of children is relevant as well.

Multiple regression analysis: income, education, Asian residents, and residents less than 18 and ages 20-29

difs_model_inc_asian_educ_child_yad <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent less than 18` + sj_dem_distancing_pre_post$`percent 20-29`)
summary(difs_model_inc_asian_educ_child_yad)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + 
##         sj_dem_distancing_pre_post$`percent associates or higher` + 
##         sj_dem_distancing_pre_post$`percent less than 18` + sj_dem_distancing_pre_post$`percent 20-29`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.017  -3.439   1.101   4.371  16.333 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                                6.67291    1.97741
## sj_dem_distancing_pre_post$`% over 125,000`                0.16845    0.02326
## sj_dem_distancing_pre_post$`% Asian`                       0.08470    0.01432
## sj_dem_distancing_pre_post$`percent associates or higher`  0.13796    0.02280
## sj_dem_distancing_pre_post$`percent less than 18`          0.22897    0.05022
## sj_dem_distancing_pre_post$`percent 20-29`                -0.14445    0.04323
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 3.375 0.000790 ***
## sj_dem_distancing_pre_post$`% over 125,000`                 7.242 1.46e-12 ***
## sj_dem_distancing_pre_post$`% Asian`                        5.913 5.83e-09 ***
## sj_dem_distancing_pre_post$`percent associates or higher`   6.051 2.62e-09 ***
## sj_dem_distancing_pre_post$`percent less than 18`           4.559 6.31e-06 ***
## sj_dem_distancing_pre_post$`percent 20-29`                 -3.341 0.000889 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.115 on 563 degrees of freedom
## Multiple R-squared:  0.4731, Adjusted R-squared:  0.4684 
## F-statistic: 101.1 on 5 and 563 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_child_yad <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +  sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent less than 18` + sj_dem_distancing_pre_post$`percent 20-29`)
summary(frac_model_inc_asian_educ_child_yad)
## 
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + 
##     sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + 
##     sj_dem_distancing_pre_post$`percent less than 18` + sj_dem_distancing_pre_post$`percent 20-29`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.03657 -0.27539  0.01018  0.25330  2.47249 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                0.123296   0.138319
## sj_dem_distancing_pre_post$`% over 125,000`                0.012207   0.001627
## sj_dem_distancing_pre_post$`% Asian`                       0.001120   0.001002
## sj_dem_distancing_pre_post$`percent associates or higher`  0.010250   0.001595
## sj_dem_distancing_pre_post$`percent less than 18`          0.013333   0.003513
## sj_dem_distancing_pre_post$`percent 20-29`                -0.012939   0.003024
##                                                           t value Pr(>|t|)    
## (Intercept)                                                 0.891 0.373103    
## sj_dem_distancing_pre_post$`% over 125,000`                 7.503 2.45e-13 ***
## sj_dem_distancing_pre_post$`% Asian`                        1.118 0.264254    
## sj_dem_distancing_pre_post$`percent associates or higher`   6.428 2.77e-10 ***
## sj_dem_distancing_pre_post$`percent less than 18`           3.795 0.000163 ***
## sj_dem_distancing_pre_post$`percent 20-29`                 -4.279 2.21e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4977 on 563 degrees of freedom
## Multiple R-squared:  0.4629, Adjusted R-squared:  0.4581 
## F-statistic: 97.03 on 5 and 563 DF,  p-value: < 2.2e-16

Conclusion from multiple regression analyses on change in leaving home behavior

From the results presented above, we see that income (making over $125,000) predicts about 37% of the variability in percent of devices leaving the home across blockgroups. Adding in education leads to a prediction of about 40% of the variation, and including percent of residents that are Asian with both education and income adds about 2% predictive power. Adding both percent of residents that are children as well as percent of residents ages 20-29 raises the regression to predicting about 47% of the variation in the data.

Testing animating the plot

# another collection for pre shelter in place behavior
sj_dem_distancing_pre_shelter <- sj_dem_distancing %>% 
  dplyr::select(-`% not completely at home`) %>%
  left_join(sj_internet_by_block %>% dplyr::select(`% not completely at home pre shelter`, blockgroup))

# relabel column for leaving home
colnames(sj_dem_distancing_pre_shelter)[ncol(sj_dem_distancing_pre_shelter)] <- "% not completely at home"

sj_dem_distancing[is.na(sj_dem_distancing)] <- 0
sj_dem_distancing_pre_shelter[is.na(sj_dem_distancing_pre_shelter)] <- 0

# add column indicating before or after shelter in place, then bind the two sets of data
sj_dem_distancing_pre_shelter <- sj_dem_distancing_pre_shelter %>% 
  mutate(
    income_trendline =
      fitted(lm(sj_dem_distancing_pre_shelter$`% not completely at home` ~ sj_dem_distancing_pre_shelter$`% over 125,000`))
  ) %>% 
  cbind(`Before or After Shelter-in-Place` = "before")
sj_dem_distancing <-
  sj_dem_distancing %>%
  mutate(
    income_trendline =
      fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`% over 125,000`))
  ) %>% 
  cbind(`Before or After Shelter-in-Place` = "after") %>% 
  rbind(sj_dem_distancing_pre_shelter)

# try animating
fig <- 
  plot_ly (sj_dem_distancing) %>%
    add_trace(
      x = ~`% over 125,000`, 
      y = ~`% not completely at home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers'
    ) %>% 
    add_trace(
      x = ~`% over 125,000`,
      y = ~income_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`
    ) %>% 
  animation_button(visible = F)
fig
# # save as rds
# saveRDS(sj_dem_distancing, "/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/sj_sd_dem_data.rds")


# fig <- plot_ly(sj_dem_distancing) %>% 
#   add_trace(
#     x = ~`% over 125,000`,
#     y = ~`% not completely at home`,
#     frame = ~`Before or After Shelter-in-Place`,
#     type = "scatter",
#     mode = "markers",
#     name = "Under $125,000",
#     marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
#     visible = T
#   ) %>% 
#   add_trace(
#     x = ~`% over 125,000`,
#     y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`% over 125,000`)),
#     name = 'trendline',
#     mode = 'lines',
#     line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
#     frame = ~`Before or After Shelter-in-Place`,
#     visible = F
#   ) %>%
#   add_trace(
#     x = ~`% not speaking spanish`,
#     y = ~`% not completely at home`,
#     frame = ~`Before or After Shelter-in-Place`,
#     name = "speak spanish",
#     marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
#     visible = F
#   ) %>% 
#   add_trace(
#     x = ~`% not speaking spanish`,
#     y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`% not speaking spanish`)),
#     name = 'trendline',
#     mode = 'lines',
#     line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
#     frame = ~`Before or After Shelter-in-Place`,
#     visible = F
#   ) %>% 
#   add_trace(
#     x = ~`percent associates or higher`,
#     y = ~`% not completely at home`,
#     frame = ~`Before or After Shelter-in-Place`,
#     name = "percent higher degree",
#     marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
#     visible = F
#   ) %>% 
#   add_trace(
#     x = ~`percent associates or higher`,
#     y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`percent associates or higher`)),
#     name = 'trendline',
#     mode = 'lines',
#     line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
#     frame = ~`Before or After Shelter-in-Place`,
#     visible = F
#   ) %>%
#   add_trace(
#     x = ~`percent high speed`,
#     y = ~`% not completely at home`,
#     frame = ~`Before or After Shelter-in-Place`,
#     name = "percent high speed internet access",
#     marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
#     visible = F
#   ) %>% 
#   add_trace(
#     x = ~`percent high speed`,
#     y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`percent high speed`)),
#     name = 'trendline',
#     mode = 'lines',
#     line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
#     frame = ~sj_dem_distancing$`Before or After Shelter-in-Place`,
#     visible = F
#   ) %>%
#   add_trace(
#     x = ~`percent less than 30`,
#     y = ~`% not completely at home`,
#     frame = ~`Before or After Shelter-in-Place`,
#     name = "percent less than 30",
#     marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
#     visible = F
#   ) %>% 
#   add_trace(
#     x = ~`percent less than 30`,
#     y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`percent less than 30`)),
#     name = 'trendline',
#     mode = 'lines',
#     line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
#     frame = ~`Before or After Shelter-in-Place`,
#     visible = F
#   ) %>%
#   layout(
#     updatemenus = list(
#       list(
#         active = 0,
#         type = 'buttons',
#         buttons = list(
#           list(
#             label = "Households Under $125,000",
#             method = 'update',
#             args = list(list(visible = c(T, T, F, F, F, F, F, F, F, F)),
#                         list(title = "Under $125,000",
#                              xaxis = list(title = "% Households Under $125,000 in Income")))),
#           list(
#             label = "Speaking Spanish",
#             method = 'update',
#             args = list(list(visible = c(F, F, T, T, F, F, F, F, F, F)),
#                         list(title = "Not Speaking Spanish",
#                              xaxis = list(title = "% Residents Not Speaking Spanish")))),
#           list(
#             label = "Education Level",
#             method = 'update',
#             args= list(list(visible = c(F, F, F, F, T, T, F, F, F, F)),
#                        list(xaxis = list(title = "% Residents With Associate's Degree or Higher")))),
#           list(
#             label = "High Speed Internet",
#             method = 'update',
#             args= list(list(visible = c(F, F, F, F, F, F, T, T, F, F)),
#                        list(xaxis = list(title = "% Households With High Speed Internet Access")))),
#           list(
#             label = "Young Population",
#             method = 'update',
#             args= list(list(visible = c(F, F, F, F, F, F, T, T, F, F)),
#                        list(xaxis = list(title = "% Residents Under Age 30"))))
#           )
#           )
#         ),
#     yaxis = list(title = "% Residents Leaving Home", 
#                  font = list(size = 15)),
#     showlegend = FALSE
#       )
# fig

Experimentation

Experimentation with other variables and other ways of analyzing the social distancing data. First I look at a few other possible variables. I start with units in the structure.

# try getting other variables
# get data on units in structure
sj_units_in_structure_by_block <- getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B25024)"
  ) %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "units"), sep = "!!") %>% 
  filter(!is.na(units)) %>%
  spread(key = units, value = estimate) %>%
  mutate(total_nums = `1, attached` + `1, detached` + `10 to 19` + `2` + `20 to 49`+ `3 or 4` + `5 to 9`+ `50 or more`+ `Boat, RV, van, etc.`+ `Mobile home`, `percent 20 or more` = (`20 to 49`+`50 or more`)* 100/ total_nums, `percent 1 only` = (`1, attached` + `1, detached`)*100/total_nums, `percent > 1` = 100 - `percent 1 only`) %>%
  left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count))

# plot 
sj_units_in_structure_by_block %>% 
  ggplot(aes(
  x = `percent 20 or more`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of structures with more than 20 units",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and 20 or More Units Per Structure"
  )

summary(lm(`% not completely at home` ~ `percent 20 or more`, sj_units_in_structure_by_block))
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent 20 or more`, 
##     data = sj_units_in_structure_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.909  -4.929  -0.410   4.435  36.508 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          51.40851    0.40035 128.409   <2e-16 ***
## `percent 20 or more`  0.01692    0.02006   0.843    0.399    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.273 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.001255,   Adjusted R-squared:  -0.0005093 
## F-statistic: 0.7114 on 1 and 566 DF,  p-value: 0.3994
sj_units_in_structure_by_block %>% 
  ggplot(aes(
  x = `percent 1 only`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of structures with only one unit",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Only 1 Unit Per Structure"
  )

summary(lm(`% not completely at home` ~ `percent 1 only`, sj_units_in_structure_by_block))
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent 1 only`, data = sj_units_in_structure_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.823  -5.080  -0.176   4.466  36.788 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      55.52941    0.87525  63.444  < 2e-16 ***
## `percent 1 only` -0.05462    0.01114  -4.901 1.24e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.107 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.04072,    Adjusted R-squared:  0.03902 
## F-statistic: 24.02 on 1 and 566 DF,  p-value: 1.245e-06

Household type and size:

# load data on household type and size
sj_house_size_type_by_block <- getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B11016)"
  ) %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "type", "size"), sep = "!!") %>% 
  filter(!is.na(type))


# household type
sj_house_type_by_block <- sj_house_size_type_by_block %>% 
  filter(is.na(size)) %>% 
  dplyr::select(-size) %>%
  spread(key = type, value = estimate) %>% 
  mutate(`total households` = `Family households` + `Nonfamily households`, `percent nonfamily` = `Nonfamily households` / `total households`) %>%
  left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count))

sj_house_type_by_block %>% 
  ggplot(aes(
  x = `percent nonfamily`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent nonfamily households",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Household Type"
  )

summary(lm(`% not completely at home` ~ `percent nonfamily`, sj_house_type_by_block))
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent nonfamily`, 
##     data = sj_house_type_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.015  -4.963  -0.104   4.329  39.206 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          49.1942     0.6269  78.475  < 2e-16 ***
## `percent nonfamily`   9.8578     2.1759   4.531 7.18e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.132 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.035,  Adjusted R-squared:  0.03329 
## F-statistic: 20.53 on 1 and 566 DF,  p-value: 7.181e-06
# household size
sj_house_size_by_block <- sj_house_size_type_by_block %>% 
  filter(!is.na(size)) %>% 
  dplyr::select(-type) %>%
  group_by(blockgroup, size) %>%
  summarize(`total of this size` = sum(estimate)) %>% 
  spread(key = size, value = `total of this size`) %>%
  mutate(total_nums = `1-person household` + `2-person household` + `3-person household` + `4-person household` + `5-person household`+ `6-person household` + `7-or-more person household`, `percent 5 or more` = (`5-person household`+`6-person household` + `7-or-more person household`)* 100/ total_nums, `percent 1 or 2 only` = (`1-person household` + `2-person household`)*100/total_nums) %>%
  left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count))

sj_house_size_by_block %>% 
  ggplot(aes(
  x = `percent 5 or more`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 5 or more people",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Households With 5 or More"
  )

summary(lm(`% not completely at home` ~ `percent 5 or more`, sj_house_size_by_block))
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent 5 or more`, 
##     data = sj_house_size_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.468  -4.816  -0.329   4.212  35.424 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         50.07984    0.55422  90.360  < 2e-16 ***
## `percent 5 or more`  0.08564    0.02487   3.443 0.000617 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.192 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02052,    Adjusted R-squared:  0.01879 
## F-statistic: 11.86 on 1 and 566 DF,  p-value: 0.0006173
sj_house_size_by_block %>% 
  ggplot(aes(
  x = `percent 1 or 2 only`,
  y = `% not completely at home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or 2 people",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Small Household Size"
  )

summary(lm(`% not completely at home` ~ `percent 1 or 2 only`, sj_house_size_by_block))
## 
## Call:
## lm(formula = `% not completely at home` ~ `percent 1 or 2 only`, 
##     data = sj_house_size_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.239  -4.962  -0.228   4.518  36.539 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           50.44723    0.95826  52.645   <2e-16 ***
## `percent 1 or 2 only`  0.02520    0.01993   1.264    0.207    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.266 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.002817,   Adjusted R-squared:  0.001055 
## F-statistic: 1.599 on 1 and 566 DF,  p-value: 0.2066

Next I consider different ways of looking at the social distancing data. First I try distance traveled.

# try other ways of looking at the social distancing data
# first look at total distance traveled
sj_sd_distance <- sj_socialdistancing %>% 
  filter(date > shelter_start) %>% 
  group_by(origin_census_block_group) %>% 
  summarize(total_dist_traveled = sum(distance_traveled_from_home), device_count = sum(device_count)) %>%
  mutate(total_dist_per_device = total_dist_traveled / device_count)

sj_distance_testing <- left_join(sj_ami_by_block, sj_sd_distance, by = c("blockgroup" = "origin_census_block_group")) %>% left_join(sj_age_by_block %>% select(blockgroup, `percent less than 30`))

sj_distance_testing %>% filter(total_dist_per_device < 500)  %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = total_dist_per_device
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
    y = "Average distance traveled per device during weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Income, Distance Metric"
  )

This is very skewed by outliers, and probably not a useful metric.

Now I consider including devices that traveled <1km as staying at (or near) home.

sj_sd_range <- sj_socialdistancing %>% 
  filter(weekend == F) %>% 
  filter(date > shelter_start) %>%
  mutate(travel_buckets_split = lapply(bucketed_distance_traveled, function(x) strsplit(x, "<1000")[[1]][2]), less_than_1km = lapply(travel_buckets_split, function(x) strsplit(x, ":")[[1]][2]), less_than_1km = lapply(less_than_1km, function(x) strsplit(x, ",")[[1]][1])) %>%
  mutate(less_than_1km = lapply(less_than_1km, function(x) str_remove(x, "[}]")))  %>% # clean a bit more
  mutate(less_than_1km = as.numeric(less_than_1km), less_than_1km = replace_na(less_than_1km, 0)) %>% 
  mutate(home_or_1km = completely_home_device_count + less_than_1km) %>% 
  group_by(origin_census_block_group) %>% 
  summarize(home_or_1km = sum(home_or_1km), device_count = sum(device_count)) %>% 
  mutate(`% Within 1km of Home` = (home_or_1km/device_count*100) %>% round(1), `% farther than 1km` = (100-`% Within 1km of Home`))

# join this with other data
sj_1km_testing <- left_join(sj_ami_by_block, sj_sd_range, by = c("blockgroup" = "origin_census_block_group")) %>% 
  left_join(sj_occupants_per_room_by_block %>% dplyr::select(`percent less than 1`, blockgroup)) %>%
  left_join(sj_age_by_block %>% dplyr::select(`percent less than 30`, blockgroup)) %>%
  left_join(sj_lang_by_block %>% dplyr::select(`% speaking english > well`, blockgroup)) 

# plot with income
sj_1km_testing %>%  
  ggplot(aes(
  x = `% over 75,000`,
  y = `% farther than 1km`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
    y = "% of devices going farther than 1km of home, weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Income, 1km Range"
  )

summary(lm(`% farther than 1km` ~ `% over 75,000`, sj_1km_testing))
## 
## Call:
## lm(formula = `% farther than 1km` ~ `% over 75,000`, data = sj_1km_testing)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.452  -4.792  -0.730   4.183  33.701 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     64.50250    1.10337   58.46   <2e-16 ***
## `% over 75,000` -0.20754    0.01701  -12.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.365 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2083, Adjusted R-squared:  0.2069 
## F-statistic: 148.9 on 1 and 566 DF,  p-value: < 2.2e-16
# plot with age
sj_1km_testing %>%  
  ggplot(aes(
  x = `percent less than 30`,
  y = `% farther than 1km`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people younger than 30",
    y = "Percent of devices farther than 1km of home during weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Age, 1km Range"
  )

summary(lm(`% farther than 1km` ~ `percent less than 30`, sj_1km_testing))
## 
## Call:
## lm(formula = `% farther than 1km` ~ `percent less than 30`, data = sj_1km_testing)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.831  -4.881  -0.287   4.243  39.516 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            44.78445    1.49842  29.888  < 2e-16 ***
## `percent less than 30`  0.17835    0.03798   4.695 3.34e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.216 on 567 degrees of freedom
## Multiple R-squared:  0.03743,    Adjusted R-squared:  0.03573 
## F-statistic: 22.05 on 1 and 567 DF,  p-value: 3.34e-06
# run multiple regression model
modeltest2 <- lm(sj_1km_testing$`% farther than 1km` ~ sj_1km_testing$`% over 75,000` + sj_1km_testing$`percent less than 30` + sj_1km_testing$`% speaking english > well` + sj_1km_testing$`percent less than 1`)
summary(modeltest2)
## 
## Call:
## lm(formula = sj_1km_testing$`% farther than 1km` ~ sj_1km_testing$`% over 75,000` + 
##     sj_1km_testing$`percent less than 30` + sj_1km_testing$`% speaking english > well` + 
##     sj_1km_testing$`percent less than 1`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.379  -4.639  -0.723   4.311  33.566 
## 
## Coefficients:
##                                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                58.92638    4.64103  12.697   <2e-16
## sj_1km_testing$`% over 75,000`             -0.21587    0.02142 -10.077   <2e-16
## sj_1km_testing$`percent less than 30`       0.03546    0.04220   0.840   0.4011
## sj_1km_testing$`% speaking english > well`  0.09863    0.04557   2.164   0.0308
## sj_1km_testing$`percent less than 1`       -0.04452    0.04627  -0.962   0.3364
##                                               
## (Intercept)                                ***
## sj_1km_testing$`% over 75,000`             ***
## sj_1km_testing$`percent less than 30`         
## sj_1km_testing$`% speaking english > well` *  
## sj_1km_testing$`percent less than 1`          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.342 on 563 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2174, Adjusted R-squared:  0.2119 
## F-statistic: 39.11 on 4 and 563 DF,  p-value: < 2.2e-16

It looks like the fit of these selected variables is slightly better for the social distancing data based on not traveling farther than 1km.

Now I also consider “non-work” behavior.

sj_nonworking_by_block <- sj_socialdistancing %>% 
  filter(weekend == F) %>% 
  filter(date > shelter_start) %>%
  mutate(nonworking = device_count - completely_home_device_count - part_time_work_behavior_devices - full_time_work_behavior_devices) %>%
  group_by(origin_census_block_group) %>%
  summarize(nonworking_count = sum(nonworking), total_device = sum(device_count)) %>% 
  mutate(nonworking_percent = nonworking_count*100 / total_device, percent_only_work_home = 100-nonworking_percent) %>%
  left_join(sj_1km_testing %>% dplyr::select(`% over 75,000`, `percent less than 30`, `% speaking english > well`, `percent less than 1`, blockgroup), by = c("origin_census_block_group" = "blockgroup"))


# plot against age and income
sj_nonworking_by_block %>%  
  ggplot(aes(
  x = `% over 75,000`,
  y = nonworking_percent
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
    y = "Percent of devices leaving home for non-work purposes during weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Income, Nonworking Behavior"
  )

summary(lm(nonworking_percent ~ `% over 75,000`, sj_nonworking_by_block))
## 
## Call:
## lm(formula = nonworking_percent ~ `% over 75,000`, data = sj_nonworking_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.077  -4.014  -0.855   3.661  32.958 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      53.1240     1.0186   52.16   <2e-16 ***
## `% over 75,000`  -0.1633     0.0157  -10.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.799 on 566 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1605, Adjusted R-squared:  0.159 
## F-statistic: 108.2 on 1 and 566 DF,  p-value: < 2.2e-16
sj_nonworking_by_block %>%  
  ggplot(aes(
  x = `percent less than 30`,
  y = nonworking_percent
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people younger than 30",
    y = "Percent of devices leaving home for non-work purposes during weekdays since shelter-in-place",
    title = "San Jose: Social Distancing and Age, Nonworking Behavior"
  )

summary(lm(nonworking_percent ~ `percent less than 30`, sj_nonworking_by_block))
## 
## Call:
## lm(formula = nonworking_percent ~ `percent less than 30`, data = sj_nonworking_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.1023  -4.1575  -0.2352   3.5644  31.4271 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            36.17063    1.32054  27.391  < 2e-16 ***
## `percent less than 30`  0.17686    0.03347   5.283 1.81e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.241 on 567 degrees of freedom
## Multiple R-squared:  0.04692,    Adjusted R-squared:  0.04524 
## F-statistic: 27.92 on 1 and 567 DF,  p-value: 1.81e-07
# multiple regression model
modeltest3 <- lm(sj_nonworking_by_block$nonworking_percent ~ sj_nonworking_by_block$`% over 75,000` + sj_nonworking_by_block$`percent less than 30` + sj_nonworking_by_block$`% speaking english > well` + sj_nonworking_by_block$`percent less than 1`)
summary(modeltest3)
## 
## Call:
## lm(formula = sj_nonworking_by_block$nonworking_percent ~ sj_nonworking_by_block$`% over 75,000` + 
##     sj_nonworking_by_block$`percent less than 30` + sj_nonworking_by_block$`% speaking english > well` + 
##     sj_nonworking_by_block$`percent less than 1`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.1703  -3.8834  -0.7683   3.6348  30.6714 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                        44.09474    4.26975  10.327
## sj_nonworking_by_block$`% over 75,000`             -0.16997    0.01971  -8.624
## sj_nonworking_by_block$`percent less than 30`       0.08184    0.03882   2.108
## sj_nonworking_by_block$`% speaking english > well`  0.08380    0.04192   1.999
## sj_nonworking_by_block$`percent less than 1`       -0.01255    0.04257  -0.295
##                                                    Pr(>|t|)    
## (Intercept)                                          <2e-16 ***
## sj_nonworking_by_block$`% over 75,000`               <2e-16 ***
## sj_nonworking_by_block$`percent less than 30`        0.0355 *  
## sj_nonworking_by_block$`% speaking english > well`   0.0461 *  
## sj_nonworking_by_block$`percent less than 1`         0.7682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.755 on 563 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1758, Adjusted R-squared:   0.17 
## F-statistic: 30.03 on 4 and 563 DF,  p-value: < 2.2e-16

These variables do worse for the percent nonworking metric, which makes sense.